library(readr)
Sales <- read_delim("Sales.csv", ";", escape_double = FALSE,
trim_ws = TRUE)
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## Date = col_character(),
## Day = col_double(),
## Month = col_character(),
## Year = col_double(),
## Customer_Age = col_double(),
## Age_Group = col_character(),
## Customer_Gender = col_character(),
## Country = col_character(),
## State = col_character(),
## Product_Category = col_character(),
## Sub_Category = col_character(),
## Product = col_character(),
## Order_Quantity = col_double(),
## Unit_Cost = col_double(),
## Unit_Price = col_double(),
## Profit = col_double(),
## Cost = col_double(),
## Revenue = col_double()
## )
View(Sales)
library(funModeling)
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
## funModeling v.1.9.4 :)
## Examples and tutorials at livebook.datascienceheroes.com
## / Now in Spanish: librovivodecienciadedatos.ai
df_status(Sales)
## variable q_zeros p_zeros q_na p_na q_inf p_inf type unique
## 1 Date 0 0.00 0 0 0 0 character 1884
## 2 Day 0 0.00 0 0 0 0 numeric 31
## 3 Month 0 0.00 0 0 0 0 character 12
## 4 Year 0 0.00 0 0 0 0 numeric 6
## 5 Customer_Age 0 0.00 0 0 0 0 numeric 70
## 6 Age_Group 0 0.00 0 0 0 0 character 4
## 7 Customer_Gender 0 0.00 0 0 0 0 character 2
## 8 Country 0 0.00 0 0 0 0 character 6
## 9 State 0 0.00 0 0 0 0 character 53
## 10 Product_Category 0 0.00 0 0 0 0 character 3
## 11 Sub_Category 0 0.00 0 0 0 0 character 17
## 12 Product 0 0.00 0 0 0 0 character 130
## 13 Order_Quantity 0 0.00 0 0 0 0 numeric 32
## 14 Unit_Cost 0 0.00 0 0 0 0 numeric 34
## 15 Unit_Price 0 0.00 0 0 0 0 numeric 36
## 16 Profit 424 0.38 0 0 0 0 numeric 1256
## 17 Cost 0 0.00 0 0 0 0 numeric 360
## 18 Revenue 0 0.00 0 0 0 0 numeric 1876
d_na<-df_status(Sales)
## variable q_zeros p_zeros q_na p_na q_inf p_inf type unique
## 1 Date 0 0.00 0 0 0 0 character 1884
## 2 Day 0 0.00 0 0 0 0 numeric 31
## 3 Month 0 0.00 0 0 0 0 character 12
## 4 Year 0 0.00 0 0 0 0 numeric 6
## 5 Customer_Age 0 0.00 0 0 0 0 numeric 70
## 6 Age_Group 0 0.00 0 0 0 0 character 4
## 7 Customer_Gender 0 0.00 0 0 0 0 character 2
## 8 Country 0 0.00 0 0 0 0 character 6
## 9 State 0 0.00 0 0 0 0 character 53
## 10 Product_Category 0 0.00 0 0 0 0 character 3
## 11 Sub_Category 0 0.00 0 0 0 0 character 17
## 12 Product 0 0.00 0 0 0 0 character 130
## 13 Order_Quantity 0 0.00 0 0 0 0 numeric 32
## 14 Unit_Cost 0 0.00 0 0 0 0 numeric 34
## 15 Unit_Price 0 0.00 0 0 0 0 numeric 36
## 16 Profit 424 0.38 0 0 0 0 numeric 1256
## 17 Cost 0 0.00 0 0 0 0 numeric 360
## 18 Revenue 0 0.00 0 0 0 0 numeric 1876
d_na[,c("variable","p_na")]
## variable p_na
## 1 Date 0
## 2 Day 0
## 3 Month 0
## 4 Year 0
## 5 Customer_Age 0
## 6 Age_Group 0
## 7 Customer_Gender 0
## 8 Country 0
## 9 State 0
## 10 Product_Category 0
## 11 Sub_Category 0
## 12 Product 0
## 13 Order_Quantity 0
## 14 Unit_Cost 0
## 15 Unit_Price 0
## 16 Profit 0
## 17 Cost 0
## 18 Revenue 0
library(caret)
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
set.seed(587964)
sample_id<-createDataPartition(Sales$Cost, p=0.50, list=FALSE)
bike1<-Sales[sample_id,]
library(caret)
set.seed(587964)
sample_id<-createDataPartition(bike1$Cost, p=0.50, list=FALSE)
bike2<-bike1[sample_id,]
library(caret)
set.seed(587964)
sample_id<-createDataPartition(bike2$Cost, p=0.50, list=FALSE)
bike3<-bike2[sample_id,]
library(caret)
set.seed(587964)
sample_id<-createDataPartition(bike3$Cost, p=0.50, list=FALSE)
bike4<-bike3[sample_id,]
library(caret)
set.seed(587964)
sample_id<-createDataPartition(bike4$Cost, p=0.50, list=FALSE)
bike5<-bike4[sample_id,]
library(caret)
set.seed(587964)
sample_id<-createDataPartition(bike5$Cost, p=0.50, list=FALSE)
bike6<-bike5[sample_id,]
library(caret)
set.seed(587964)
sample_id<-createDataPartition(bike6$Cost, p=0.50, list=FALSE)
bike_sales<-bike6[sample_id,]
bike_sales_europe<- bike_sales
bike_sales_europe$Date<-NULL
bike_sales_europe$Day <- NULL
library("openxlsx")
write.xlsx(bike_sales_europe, 'bike_sales_europe_sampling.xlsx')
bike_sales_europe$Customer_Gender <- factor(bike_sales_europe$Customer_Gender, levels=c("F","M"))
bike_sales_europe$Product_Category <- factor(bike_sales_europe$Product_Category, levels=c("Accessories","Clothing","Bikes"))
bike_sales_europe$Product_Category <- factor(bike_sales_europe$Sub_Category, levels=c("Bike Racks","Bike Stands","Bottles and Cages","Caps","Cleaners","Fenders","Glove","Helmets","Hydration Pack","Jerseys","Mountain Bikes","Road Bikes","Shorts","Socks","Tires and Tubes","Touring Bikes", "Vests"))
summary(bike_sales_europe)
## Month Year Customer_Age Age_Group
## Length:884 Min. :2011 Min. :17.00 Length:884
## Class :character 1st Qu.:2013 1st Qu.:28.00 Class :character
## Mode :character Median :2014 Median :34.00 Mode :character
## Mean :2014 Mean :36.09
## 3rd Qu.:2016 3rd Qu.:43.00
## Max. :2016 Max. :75.00
##
## Customer_Gender Country State
## F:434 Length:884 Length:884
## M:450 Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## Product_Category Sub_Category Product
## Tires and Tubes :257 Length:884 Length:884
## Bottles and Cages:120 Class :character Class :character
## Helmets :119 Mode :character Mode :character
## Road Bikes :106
## Mountain Bikes : 74
## (Other) :184
## NA's : 24
## Order_Quantity Unit_Cost Unit_Price Profit
## Min. : 1.00 Min. : 1.0 Min. : 2.0 Min. : -3.00
## 1st Qu.: 2.00 1st Qu.: 2.0 1st Qu.: 5.0 1st Qu.: 28.75
## Median :10.00 Median : 11.0 Median : 29.5 Median : 95.00
## Mean :11.99 Mean : 267.9 Mean : 455.0 Mean : 308.40
## 3rd Qu.:20.00 3rd Qu.: 42.0 3rd Qu.: 64.0 3rd Qu.: 363.75
## Max. :32.00 Max. :2171.0 Max. :3578.0 Max. :4030.00
##
## Cost Revenue
## Min. : 1.0 Min. : 2.0
## 1st Qu.: 28.0 1st Qu.: 61.0
## Median : 108.5 Median : 232.0
## Mean : 509.5 Mean : 817.9
## 3rd Qu.: 425.2 3rd Qu.: 842.0
## Max. :6513.0 Max. :10305.0
##
rowSums(is.na(bike_sales_europe))
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [38] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [75] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [149] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [186] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0
## [223] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [260] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [297] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## [334] 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [371] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [408] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [445] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [482] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [519] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [556] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [593] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [630] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [667] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [704] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [741] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [778] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [815] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [852] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
colSums(is.na(bike_sales_europe))
## Month Year Customer_Age Age_Group
## 0 0 0 0
## Customer_Gender Country State Product_Category
## 0 0 0 24
## Sub_Category Product Order_Quantity Unit_Cost
## 0 0 0 0
## Unit_Price Profit Cost Revenue
## 0 0 0 0
#Orjinal datayı yedekliyorum #
data_org<-bike_sales_europe
#Veriye eksik değerler atıyorum #
bike_sales_europe_miss<-bike_sales_europe
aa<-sample(1:nrow(bike_sales_europe_miss),floor(nrow(bike_sales_europe_miss)*0.05))
bike_sales_europe_miss$Profit[aa]<-NA
colSums(is.na(bike_sales_europe_miss))
## Month Year Customer_Age Age_Group
## 0 0 0 0
## Customer_Gender Country State Product_Category
## 0 0 0 24
## Sub_Category Product Order_Quantity Unit_Cost
## 0 0 0 0
## Unit_Price Profit Cost Revenue
## 0 44 0 0
#Eksik gözlemlerin yapısını inceliyorum#
library(mice)
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
md.pattern(bike_sales_europe_miss)
## Month Year Customer_Age Age_Group Customer_Gender Country State
## 816 1 1 1 1 1 1 1
## 44 1 1 1 1 1 1 1
## 24 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0
## Sub_Category Product Order_Quantity Unit_Cost Unit_Price Cost Revenue
## 816 1 1 1 1 1 1 1
## 44 1 1 1 1 1 1 1
## 24 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0
## Product_Category Profit
## 816 1 1 0
## 44 1 0 1
## 24 0 1 1
## 24 44 68
View(bike_sales_europe_miss)
#Basit doldurma ile eksik değerleri dolduruyorum#
bike_sales_europe_miss_ort<-bike_sales_europe_miss
hist(bike_sales_europe_miss_ort$Profit)
bike_sales_europe_miss_ort$Profit[is.na(bike_sales_europe_miss_ort$Profit)]<-mean(bike_sales_europe_miss_ort$Profit,na.rm=TRUE)
#Eksik değerleri doldurduğum veriyi kontrol ediyorum#
View(bike_sales_europe_miss_ort)
#Veriyi eğitim ve test verisi olarak incelemek istiyorum ve %80-%20 ayırıyorum#
library(caret)
set.seed(74367432)
train_id<-createDataPartition(data_org$Profit, p=0.80,
list=FALSE,
times=1)
train<-data_org[train_id,]
test<-data_org[-train_id,]
#Veriyi test ve eeğitim olarak exportluyorum#
library("openxlsx")
write.xlsx(train, 'train.xlsx')
write.xlsx(test, 'test.xlsx')
#DPLYR fonksiyonu ile train verimi özetliyorum#
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Hmisc':
##
## src, summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
glimpse(train)
## Rows: 708
## Columns: 16
## $ Month <chr> "August", "December", "September", "December", "Octob…
## $ Year <dbl> 2013, 2013, 2013, 2013, 2013, 2015, 2013, 2016, 2015,…
## $ Customer_Age <dbl> 42, 35, 37, 34, 44, 23, 34, 37, 42, 38, 43, 29, 27, 3…
## $ Age_Group <chr> "Adults (35-64)", "Adults (35-64)", "Adults (35-64)",…
## $ Customer_Gender <fct> M, F, F, M, M, M, M, F, M, M, M, M, F, F, M, F, M, F,…
## $ Country <chr> "United States", "Australia", "Australia", "Germany",…
## $ State <chr> "Oregon", "Queensland", "Queensland", "Hessen", "Brit…
## $ Product_Category <fct> Bike Racks, Bike Racks, Bike Stands, Bike Stands, Bot…
## $ Sub_Category <chr> "Bike Racks", "Bike Racks", "Bike Stands", "Bike Stan…
## $ Product <chr> "Hitch Rack - 4-Bike", "Hitch Rack - 4-Bike", "All-Pu…
## $ Order_Quantity <dbl> 17, 1, 8, 6, 29, 15, 17, 17, 14, 12, 5, 3, 5, 21, 24,…
## $ Unit_Cost <dbl> 45, 45, 59, 59, 2, 3, 2, 4, 4, 2, 4, 2, 2, 4, 2, 3, 4…
## $ Unit_Price <dbl> 120, 120, 159, 159, 5, 9, 5, 10, 10, 5, 10, 5, 5, 10,…
## $ Profit <dbl> 989, 56, 596, 533, 86, 85, 33, 75, 66, 34, 28, 8, 12,…
## $ Cost <dbl> 765, 45, 472, 354, 58, 45, 34, 68, 56, 24, 20, 6, 10,…
## $ Revenue <dbl> 1754, 101, 1068, 887, 144, 130, 67, 143, 122, 58, 48,…
summary(train)
## Month Year Customer_Age Age_Group
## Length:708 Min. :2011 Min. :17.00 Length:708
## Class :character 1st Qu.:2013 1st Qu.:28.00 Class :character
## Mode :character Median :2014 Median :34.00 Mode :character
## Mean :2014 Mean :35.82
## 3rd Qu.:2016 3rd Qu.:43.00
## Max. :2016 Max. :75.00
##
## Customer_Gender Country State
## F:357 Length:708 Length:708
## M:351 Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## Product_Category Sub_Category Product
## Tires and Tubes :205 Length:708 Length:708
## Bottles and Cages: 97 Class :character Class :character
## Helmets : 93 Mode :character Mode :character
## Road Bikes : 86
## Mountain Bikes : 56
## (Other) :155
## NA's : 16
## Order_Quantity Unit_Cost Unit_Price Profit
## Min. : 1.00 Min. : 1.0 Min. : 2.0 Min. : -3.00
## 1st Qu.: 2.00 1st Qu.: 2.0 1st Qu.: 5.0 1st Qu.: 28.75
## Median :10.50 Median : 10.0 Median : 27.0 Median : 95.00
## Mean :12.09 Mean : 266.5 Mean : 451.5 Mean : 311.17
## 3rd Qu.:21.00 3rd Qu.: 42.0 3rd Qu.: 64.0 3rd Qu.: 363.75
## Max. :32.00 Max. :2171.0 Max. :3578.0 Max. :4030.00
##
## Cost Revenue
## Min. : 1.00 Min. : 2.00
## 1st Qu.: 27.75 1st Qu.: 61.75
## Median : 104.50 Median : 213.50
## Mean : 513.62 Mean : 824.79
## 3rd Qu.: 420.00 3rd Qu.: 834.50
## Max. :6513.00 Max. :10305.00
##
#Orjinal verimdeki profit değişkenini özetliyorum#
summary(data_org$Profit)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.00 28.75 95.00 308.40 363.75 4030.00
#Profit değişkenini kategorize ediyorum#
train$Profit_kat[train$Profit >= -3 & train$Profit <= 1350] <- "Az"
## Warning: Unknown or uninitialised column: `Profit_kat`.
train$Profit_kat[train$Profit >= 1351 & train$Profit <= 2690] <- "Orta"
train$Profit_kat[train$Profit >= 2691] <- "Cok"
#Kategorik değişkenlerime faktör ataması yapıyorum#
train<-as.data.frame(train)
train$Customer_Gender<-as.factor(train$Customer_Gender)
train$Product_Category<-as.factor(train$Product_Category)
summary(train)
## Month Year Customer_Age Age_Group
## Length:708 Min. :2011 Min. :17.00 Length:708
## Class :character 1st Qu.:2013 1st Qu.:28.00 Class :character
## Mode :character Median :2014 Median :34.00 Mode :character
## Mean :2014 Mean :35.82
## 3rd Qu.:2016 3rd Qu.:43.00
## Max. :2016 Max. :75.00
##
## Customer_Gender Country State
## F:357 Length:708 Length:708
## M:351 Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## Product_Category Sub_Category Product
## Tires and Tubes :205 Length:708 Length:708
## Bottles and Cages: 97 Class :character Class :character
## Helmets : 93 Mode :character Mode :character
## Road Bikes : 86
## Mountain Bikes : 56
## (Other) :155
## NA's : 16
## Order_Quantity Unit_Cost Unit_Price Profit
## Min. : 1.00 Min. : 1.0 Min. : 2.0 Min. : -3.00
## 1st Qu.: 2.00 1st Qu.: 2.0 1st Qu.: 5.0 1st Qu.: 28.75
## Median :10.50 Median : 10.0 Median : 27.0 Median : 95.00
## Mean :12.09 Mean : 266.5 Mean : 451.5 Mean : 311.17
## 3rd Qu.:21.00 3rd Qu.: 42.0 3rd Qu.: 64.0 3rd Qu.: 363.75
## Max. :32.00 Max. :2171.0 Max. :3578.0 Max. :4030.00
##
## Cost Revenue Profit_kat
## Min. : 1.00 Min. : 2.00 Length:708
## 1st Qu.: 27.75 1st Qu.: 61.75 Class :character
## Median : 104.50 Median : 213.50 Mode :character
## Mean : 513.62 Mean : 824.79
## 3rd Qu.: 420.00 3rd Qu.: 834.50
## Max. :6513.00 Max. :10305.00
##
train$Profit_kat<-as.factor(train$Profit_kat)
summary(train)
## Month Year Customer_Age Age_Group
## Length:708 Min. :2011 Min. :17.00 Length:708
## Class :character 1st Qu.:2013 1st Qu.:28.00 Class :character
## Mode :character Median :2014 Median :34.00 Mode :character
## Mean :2014 Mean :35.82
## 3rd Qu.:2016 3rd Qu.:43.00
## Max. :2016 Max. :75.00
##
## Customer_Gender Country State
## F:357 Length:708 Length:708
## M:351 Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## Product_Category Sub_Category Product
## Tires and Tubes :205 Length:708 Length:708
## Bottles and Cages: 97 Class :character Class :character
## Helmets : 93 Mode :character Mode :character
## Road Bikes : 86
## Mountain Bikes : 56
## (Other) :155
## NA's : 16
## Order_Quantity Unit_Cost Unit_Price Profit
## Min. : 1.00 Min. : 1.0 Min. : 2.0 Min. : -3.00
## 1st Qu.: 2.00 1st Qu.: 2.0 1st Qu.: 5.0 1st Qu.: 28.75
## Median :10.50 Median : 10.0 Median : 27.0 Median : 95.00
## Mean :12.09 Mean : 266.5 Mean : 451.5 Mean : 311.17
## 3rd Qu.:21.00 3rd Qu.: 42.0 3rd Qu.: 64.0 3rd Qu.: 363.75
## Max. :32.00 Max. :2171.0 Max. :3578.0 Max. :4030.00
##
## Cost Revenue Profit_kat
## Min. : 1.00 Min. : 2.00 Az :679
## 1st Qu.: 27.75 1st Qu.: 61.75 Cok : 8
## Median : 104.50 Median : 213.50 Orta: 21
## Mean : 513.62 Mean : 824.79
## 3rd Qu.: 420.00 3rd Qu.: 834.50
## Max. :6513.00 Max. :10305.00
##
###NOKTA OLCULERI
#3 Nokta Özeti
n<-nrow(train)
train_sorted <- train[order(train$Cost),] #ascending
#ortnc_derinlik<-(n+1)/2 #tek sayı ise
#cift ise:
a<-(n/2)
b<-(n/2)+1
(train_sorted$Cost[a]+train_sorted$Cost[b])/2
## [1] 104.5
median(train$Cost)
## [1] 104.5
mean(train$Cost)
## [1] 513.6215
hist(train$Cost)
##5 Nokta Özeti##
fivenum(train$Cost)
## [1] 1.0 27.5 104.5 420.0 6513.0
###DEGISIM OLCULERI##
stdev<-sd(train$Profit)
mean<-mean(train$Profit)
Degisim_kats_Profit<-(stdev/mean)*100
##MAD(Median Absolute Deviation):
sort <- train[order(train$Profit),]
medianf<-median(sort$Profit)
sort$fmed<-abs(sort$Profit-medianf)
sort2 <- sort[order(sort$fmed),]
mad<-median(sort2$fmed)
##Genişletilmiş Nokta Özeti##
#Sol kuyruk
sol <- function(x) {
c(quantile(x, probs = 1/2) ,
quantile(x, probs = 1/4),
quantile(x, probs =1/8 ),
quantile(x,probs=1/16),
quantile(x,probs=1/32),
quantile(x,probs=1/64)
)
}
#Sag kuyruk
sag <- function(x) {
c(quantile(x, probs = 1/2) ,
quantile(x, probs = 3/4),
quantile(x, probs = 7/8),
quantile(x,probs=15/16),
quantile(x,probs=31/32),
quantile(x,probs=63/64)
)
}
##Kuyruk Uzunlugu Incelemesi##
x_a<-sol(train$Unit_Price)
x_u<-sag(train$Unit_Price)
x_mrg<-as.data.frame(cbind(x_a,x_u))
rownames(x_mrg)<-c("1/2","1/4","1/8","1/16","1/32","1/64")
colnames(x_mrg)<-c("Alt_Kuyruk","Ust_Kuyruk")
x_mrg$orta_nokta<-(x_mrg$Alt_Kuyruk+x_mrg$Ust_Kuyruk)/2
x_mrg
## Alt_Kuyruk Ust_Kuyruk orta_nokta
## 1/2 27 27.000 27.000
## 1/4 5 64.000 34.500
## 1/8 4 2295.000 1149.500
## 1/16 2 2431.938 1216.969
## 1/32 2 3287.625 1644.812
## 1/64 2 3578.000 1790.000
hist(train$Unit_Price)
##Kesilmis ortalama##
p<-0.1
mean(train$Unit_Cost, trim = p)
## [1] 127.8292
#Kalan gozlem sayısı hesaplanmak istenirse:
n<-nrow(train)
ks<-n-(as.integer(2*p*n))
ks
## [1] 567
##Ortalama değerim olan 127.8292’e denk 567 gözlem vardır.##
##Geometrik ortalama##
library("psych")
##
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
##
## describe
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
geometric.mean(train$Unit_Cost)
## [1] 16.5863
##Gini##
freq <- as.data.frame(table(train$Unit_Cost))
names(freq)[1] <- 'total fiyat'
gini <- function(a,b) {
a1 <- (a/(a+b))**2
b1 <- (b/(a+b))**2
x<-1-(a1 + b1)
return(x)
}
gn<-gini(freq[1,2],freq[2,2])
k<-2
gn/((k-1)/k)
## [1] 0.9824649
##0.7756233 değeri “1” e cok yakın oldugu icin veriler tüm düzeylere esit dagılmamıstır.##
##Entropi##
entropy<-function(base,a,b) {
var <- abs(((a)/(a+b))*log(((a)/(a+b)),base))-(((b)/(a+b))*log(((b)/(a+b)),base))
return(var)
}
ent<-entropy(10,freq[1,2],freq[2,2])
k<-2
ent/(log(k,10))
## [1] 0.9873139
##0.8314744 değeri ile normalize edilebilir##
##BAR PLOT##
library(dplyr)
tra_pct <- train %>% group_by(Profit_kat, Cost) %>%
dplyr::summarise(count=n()) %>%
mutate(pct=round(count/sum(count),2))
## `summarise()` has grouped output by 'Profit_kat'. You can override using the `.groups` argument.
ggplot(tra_pct, aes(Profit_kat, pct, fill = Cost)) +
geom_bar(stat='identity') +
geom_text(aes(label=scales::percent(pct)), position = position_stack(vjust = .5))+
scale_y_continuous(labels = scales::percent)
##PIE CHART##
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:Hmisc':
##
## subplot
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
cross<-as.data.frame(prop.table(table(train$Customer_Age)))
colnames(cross)[1] <- "Young Adult"
plot_ly(cross, labels = ~ Freq, values = ~Freq, type = 'pie')%>% layout(title ='Müşteri yaşlarının dagılımı')
##Müşterilerin yaşlarını pasta grafiğinde görüyoruz, buradan bisiklet almanın yüzde olarak bakıldığında her yaşta tercih edildiğini görsek de ½11.3 ile genç yetişkinlerde daha çok gözlemliyoruz.
##Histogram+Yogunluk bırlıkte##
ggplot(train,aes(Unit_Cost))+
geom_histogram(aes(y=..density..))+
geom_density(alpha=.1,fill="lightblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
##Q-Q PLOT##
ggplot(train, aes(sample=Unit_Cost))+stat_qq()
qqnorm(train$Unit_Price)
##BOX PLOT##
ggplot(train, aes(y=Unit_Price))+
geom_boxplot()
ggplot(train, aes(x=Profit_kat,y=Unit_Price, fill=Profit_kat))+
geom_boxplot()+
labs(title="Kar kategorileri bazında total fiyat Kutu Cizimi",x="Kar Kategorileri", y = "FEV")+
scale_fill_discrete(name = "Kar Kategorileri")+
stat_summary(fun = median, geom="line", group= 1, color= "pink", size = 1)
##Kar için inceleme yaparsak çok kar elde edildiğini ama yine de grafiğin dalgalandığını söyleyebiliriz##
##renk değiştirdim##
ggplot(train, aes(x=Profit_kat,y=Unit_Price, fill=Profit_kat)) +
geom_boxplot(outlier.colour="violet", outlier.shape=7,
outlier.size=1)
##BUBBLE PLOT##
ggplot(train, aes(Revenue,Profit, color=Cost, size=Cost))+
geom_point(alpha=0.5)+
scale_color_gradientn(colors =rainbow(unique(train$Cost))) +
theme(legend.position = "right")
##Gelire baktığımızda artan bir grafik gördüğümüzü ama arttığı yerde sürekli olmadığını söyleyebiliriz##
##Scatter plot##
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble 3.1.2 ✓ stringr 1.4.0
## ✓ tidyr 1.1.3 ✓ forcats 0.5.1
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x psych::%+%() masks ggplot2::%+%()
## x psych::alpha() masks ggplot2::alpha()
## x plotly::filter() masks dplyr::filter(), mice::filter(), stats::filter()
## x dplyr::lag() masks stats::lag()
## x purrr::lift() masks caret::lift()
## x dplyr::src() masks Hmisc::src()
## x dplyr::summarize() masks Hmisc::summarize()
ggplot(train, aes(Cost,Unit_Cost))+
geom_point(size=2,shape=21,stroke=1,color="pink", fill="lightblue")+
geom_smooth(method = "lm", col="violet",se = FALSE)
## `geom_smooth()` using formula 'y ~ x'
##Maaliyet ve total maaliyetin ilişkisini incelediğimizde az ilişkili olduklarını söyleyebiliriz.##
##Hexagonal Binning##
library("hexbin")
ggplot(train,aes(x=Unit_Cost,y=Cost))+
geom_hex(bins=20)+theme_minimal()
##Maaliyet ve total maaliyetin ilişkisini incelediğimizde az ilişkili olduklarını söyleyebiliriz.##
##Contour density##
ggplot(train, aes(x=Unit_Cost, y=Cost) ) +
stat_density_2d(aes(fill = ..level..), geom = "polygon", colour="violet")+theme_classic()
##Maaliyet ve total maaliyetin ilişkisini incelediğimizde az ilişkili olduklarını söyleyebiliriz.##
##Sacılım matrisi- Histogram+Yogunluk+Duzlestırme+Korelasyon##
cor_train<-train[,c(12,13,14)]
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## Attaching package: 'GGally'
## The following object is masked from 'package:funModeling':
##
## range01
cor(cor_train)#Korelasyon degerleri
## Unit_Cost Unit_Price Profit
## Unit_Cost 1.0000000 0.9977338 0.7674159
## Unit_Price 0.9977338 1.0000000 0.7826053
## Profit 0.7674159 0.7826053 1.0000000
plot(cor_train)
ggpairs(cor_train)#yogunluk+sacılım+corr
library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
chart.Correlation(cor_train, histogram=TRUE, pch=19)
##Profit değerimin sağa çarpık olduğunu gözlemleyebiliriz.##
library(ggplot2)
ggplot(data = train, aes(Cost, Profit, fill =))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1))+
coord_fixed()
#corr plot
library(corrplot)
## corrplot 0.88 loaded
corrplot(cor(train[,12:14]), method = "ellipse")
corrplot.mixed(cor(train[,12:14]), lower = "ellipse", upper = "circle",tl.col = "black") #ellipse yerine square olabilir
#Kara göre göre Cost üzerinden ortanca ve DAG bulunursa:
library(dplyr)
a<-train %>%group_by(Profit_kat) %>%
summarize(Q1=quantile (Cost, probs=0.25), Median=quantile (Cost, probs=0.50), Q3=quantile(Cost, probs=0.75), DAG=Q3-Q1)
a
## # A tibble: 3 x 5
## Profit_kat Q1 Median Q3 DAG
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Az 27 91 351 324
## 2 Cok 3798 4070 5426. 1628.
## 3 Orta 2852 3756 4342 1490
##Ortanca Izi Cizimi##
ggplot(train, aes(x=Profit_kat,y=Cost, fill=Profit_kat))+
geom_boxplot()+
stat_summary(fun = median, geom="line", group= 1, color= "lightblue", size = 1)
##Profit kategorilerinin gelire göre dağılımında maaliyetin dalgalandığını gözlemliyoruz, ortanca izine göre karın yüksek olduğunu söyleyebiliriz##
##Konum-Varyans Cizimi##
ggplot(a, aes(x=Median,y=DAG, color=Profit_kat, group=1))+
geom_point(size=10,alpha=0.6)+
geom_line(color="pink")
##ETKILESIM##
#2 değişenin birlikte etkisi var mı yok mu bunu incelemek icin:
etk_train<-train%>%
group_by(Cost,Profit_kat)%>%
summarise(Median=median(Cost))
## `summarise()` has grouped output by 'Cost'. You can override using the `.groups` argument.
etk_train
## # A tibble: 214 x 3
## # Groups: Cost [207]
## Cost Profit_kat Median
## <dbl> <fct> <dbl>
## 1 1 Az 1
## 2 2 Az 2
## 3 3 Az 3
## 4 4 Az 4
## 5 5 Az 5
## 6 6 Az 6
## 7 7 Az 7
## 8 8 Az 8
## 9 9 Az 9
## 10 10 Az 10
## # … with 204 more rows
ggplot(etk_train, aes(x = Profit_kat, y = Median,color=Cost,group=Cost)) +
geom_line() +
geom_point()
###Mosaic Plot##
table3 <- xtabs(~Customer_Age+Profit_kat+Unit_Price, data=train)
ftable(table3)
## Unit_Price 2 4 5 8 9 10 21 22 24 25 29 30 33 35 50 54 55 64 70 120 159 540 565 742 769 783 1120 1215 1701 2295 2320 2384 2443 3375 3400 3578
## Customer_Age Profit_kat
## 17 Az 1 0 1 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 18 Az 0 4 3 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 19 Az 1 0 1 0 1 0 1 1 0 0 0 0 0 3 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 20 Az 1 2 3 0 2 0 0 0 0 1 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 21 Az 2 0 3 0 1 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 22 Az 2 3 4 1 2 0 0 0 0 0 0 0 0 3 1 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 23 Az 4 2 5 1 2 0 0 2 0 0 1 0 0 3 0 0 1 0 1 0 0 0 0 0 0 0 2 0 0 0 1 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 24 Az 3 0 2 0 0 0 0 0 0 1 0 0 0 0 2 3 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 25 Az 0 0 5 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 1 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0
## 26 Az 3 0 4 0 0 1 1 1 1 0 0 0 0 8 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0
## 27 Az 1 0 8 0 2 0 0 0 0 0 0 0 0 4 1 0 0 0 0 0 0 2 0 1 0 0 0 0 0 0 0 0 1 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 28 Az 1 0 2 1 5 0 1 0 0 2 0 0 1 2 1 0 0 0 0 0 0 2 0 1 1 0 0 0 2 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 29 Az 3 0 7 0 2 0 0 2 1 3 2 0 1 2 2 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 2 0 0 1
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0
## 30 Az 1 1 4 1 1 1 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 1 1 0 0 0 2
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 31 Az 5 1 3 0 2 0 0 0 3 0 0 0 0 6 2 0 0 0 0 0 0 0 0 0 1 2 0 2 0 1 2 0 1 0 0 1
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 32 Az 4 2 3 0 3 1 0 1 0 0 1 1 0 5 1 1 0 0 0 0 0 1 0 0 0 0 3 0 2 0 0 1 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 33 Az 1 0 5 1 1 1 0 0 0 2 0 0 0 9 1 1 0 0 0 0 0 1 0 0 1 0 1 0 0 1 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 34 Az 2 0 6 1 1 0 1 0 0 1 0 0 0 4 2 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 3 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 35 Az 1 1 3 0 0 1 0 3 0 0 0 0 0 2 0 0 1 0 0 1 0 1 0 0 0 0 1 1 1 0 2 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 36 Az 5 2 3 0 2 0 0 0 0 0 0 0 0 6 1 1 0 0 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 37 Az 4 0 5 0 0 2 0 2 1 0 0 0 0 2 0 1 1 0 1 0 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1
## 38 Az 2 0 4 1 4 1 0 3 0 1 0 0 0 5 0 2 1 0 0 0 0 0 0 0 1 0 0 0 1 1 2 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 1
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 39 Az 2 0 1 0 0 0 0 0 0 0 0 0 0 2 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 40 Az 1 0 3 2 1 0 0 1 0 1 0 0 0 4 2 0 0 0 1 0 0 1 0 0 1 0 0 0 0 0 0 1 3 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0
## 41 Az 3 1 2 1 4 0 0 1 0 0 0 3 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 1
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## 42 Az 1 0 2 1 1 1 0 0 0 0 0 0 0 4 0 1 0 0 0 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 43 Az 3 0 4 0 1 2 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 1 1 0 0 1 0 0 0 0 3 0 0 0 0 1
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## 44 Az 0 0 4 0 2 0 0 0 0 0 0 0 0 5 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 2 1 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 45 Az 0 0 3 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 1 0 1 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 46 Az 1 0 3 0 0 0 0 0 1 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 47 Az 0 2 1 0 0 1 0 1 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 48 Az 1 1 0 0 1 0 1 0 0 0 0 0 0 2 1 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 49 Az 1 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 50 Az 0 2 1 0 1 0 0 1 0 0 0 0 1 5 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 51 Az 0 1 3 0 0 0 1 0 0 0 1 1 0 2 0 0 1 0 0 0 0 0 0 1 0 0 1 0 0 1 1 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 52 Az 2 0 0 0 0 1 1 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 53 Az 2 0 3 0 1 0 2 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 54 Az 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 55 Az 1 0 1 0 2 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 56 Az 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 57 Az 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 58 Az 1 0 2 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 59 Az 0 0 2 0 2 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 60 Az 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 61 Az 1 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 62 Az 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 63 Az 0 0 1 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 64 Az 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 66 Az 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 69 Az 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 70 Az 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 73 Az 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 75 Az 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## Cok 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Orta 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##Chernoff Faces##
library(aplpack)
library(dplyr)
new_data<-train%>%
group_by(Profit_kat) %>%
dplyr::summarize(mean_Cost = mean(Cost),mean_Unit_Price = mean(Unit_Price),mean_Customer_Age = mean(Customer_Age))
faces(new_data[,-1], labels=as.character(new_data$Profit_kat))
## effect of variables:
## modified item Var
## "height of face " "mean_Cost"
## "width of face " "mean_Unit_Price"
## "structure of face" "mean_Customer_Age"
## "height of mouth " "mean_Cost"
## "width of mouth " "mean_Unit_Price"
## "smiling " "mean_Customer_Age"
## "height of eyes " "mean_Cost"
## "width of eyes " "mean_Unit_Price"
## "height of hair " "mean_Customer_Age"
## "width of hair " "mean_Cost"
## "style of hair " "mean_Unit_Price"
## "height of nose " "mean_Customer_Age"
## "width of nose " "mean_Cost"
## "width of ear " "mean_Unit_Price"
## "height of ear " "mean_Customer_Age"
##Star Plot##
data_sorted <- train[order(-train$Cost),]
#Datayı dilimleme##
library(ggplot2)
data_sorted$group <- as.numeric(cut_number(as.numeric(rownames(data_sorted)), 10))
library(dplyr)
data_star<-data_sorted %>%
group_by(group) %>%
dplyr::summarize(Cost= mean(Cost),Profit= mean(Profit),Unit_Cost= mean(Unit_Cost))
stars(data_star[,-1], key.loc = c(15,1.25),main = "Starplot",label=row.names(data_star),cex=.7)
##Starplot grafiğimizden 5. ve 7. kümelerin birbirine yakın gözlemler içerdiğini görüyoruz##
##Trellis plot##
library(lattice)
library(dplyr)
tr_select<-filter(train,between('Customer_Age',7,12))
## Warning in between("Customer_Age", 7, 12): Zorlamadan dolayı ortaya çıkan NAs
xyplot(Cost ~ Profit | 'Customer_Age', data = tr_select)
##Uyum analizi- diff. data##
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
data(housetasks)
dt <- as.table(as.matrix(housetasks))
library(FactoMineR)
res.ca <- CA(housetasks, graph = FALSE)
fviz_ca_biplot(res.ca, repel = TRUE)
##Kumeleme##
dist1<- dist(train, method = "euclidean") # uzaklik matrisi
## Warning in dist(train, method = "euclidean"): Zorlamadan dolayı ortaya çıkan NAs
bikey <- hclust(dist1, method="ward.D")
plot(bikey) # Dendogram çizimi
##Kumeleme##
dist1<- dist(train, method = "euclidean") # uzaklik matrisi
## Warning in dist(train, method = "euclidean"): Zorlamadan dolayı ortaya çıkan NAs
bikey <- hclust(dist1, method="ward.D")
plot(bikey) # Dendogram çizimi
##Radar Grafik##
#Ilk 6 gozlem icin cizim yapilirsa
bike_sel<-train[1:6,]
col_max <- apply(bike_sel, 2, max)
col_min <- apply(bike_sel, 2, min)
col_mean <- apply(bike_sel, 2, mean)
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
col_summary <- t(data.frame(Max = col_max, Min = col_min, Average = col_mean))
dfbike <- as.data.frame(rbind(col_summary, bike_sel))
## Warning in `[<-.factor`(`*tmp*`, ri, value = c(42, 35, 37, 34, 44, 23)): invalid
## factor level, NA generated
## Warning in `[<-.factor`(`*tmp*`, ri, value = c(17, 1, 8, 6, 29, 15)): invalid
## factor level, NA generated
## Warning in `[<-.factor`(`*tmp*`, ri, value = c(17, 1, 8, 6, 29, 15)): invalid
## factor level, NA generated
## Warning in `[<-.factor`(`*tmp*`, ri, value = c(17, 1, 8, 6, 29, 15)): invalid
## factor level, NA generated
## Warning in `[<-.factor`(`*tmp*`, ri, value = c(17, 1, 8, 6, 29, 15)): invalid
## factor level, NA generated
## Warning in `[<-.factor`(`*tmp*`, ri, value = c(17, 1, 8, 6, 29, 15)): invalid
## factor level, NA generated
## Warning in `[<-.factor`(`*tmp*`, ri, value = c(17, 1, 8, 6, 29, 15)): invalid
## factor level, NA generated
dfbike
## Month Year Customer_Age Age_Group Customer_Gender
## Max September 2015 44 Youth (<25) M
## Min August 2013 23 Adults (35-64) F
## Average <NA> <NA> <NA> <NA> <NA>
## 1 August 2013 <NA> Adults (35-64) M
## 2 December 2013 <NA> Adults (35-64) F
## 3 September 2013 <NA> Adults (35-64) F
## 4 December 2013 <NA> Young Adults (25-34) M
## 5 October 2013 44 Adults (35-64) M
## 6 October 2015 23 Youth (<25) M
## Country State Product_Category Sub_Category
## Max United States Queensland Bottles and Cages Bottles and Cages
## Min Australia British Columbia Bike Racks Bike Racks
## Average <NA> <NA> <NA> <NA>
## 1 United States Oregon Bike Racks Bike Racks
## 2 Australia Queensland Bike Racks Bike Racks
## 3 Australia Queensland Bike Stands Bike Stands
## 4 Germany Hessen Bike Stands Bike Stands
## 5 Canada British Columbia Bottles and Cages Bottles and Cages
## 6 United Kingdom England Bottles and Cages Bottles and Cages
## Product Order_Quantity Unit_Cost Unit_Price Profit Cost
## Max Water Bottle - 30 oz. 29 59 159 989 765
## Min All-Purpose Bike Stand 1 2 5 56 45
## Average <NA> <NA> <NA> <NA> <NA> <NA>
## 1 Hitch Rack - 4-Bike <NA> <NA> <NA> 989 765
## 2 Hitch Rack - 4-Bike <NA> <NA> <NA> <NA> <NA>
## 3 All-Purpose Bike Stand <NA> 59 159 <NA> <NA>
## 4 All-Purpose Bike Stand <NA> 59 159 <NA> <NA>
## 5 Water Bottle - 30 oz. 29 <NA> <NA> <NA> <NA>
## 6 Road Bottle Cage <NA> <NA> <NA> <NA> <NA>
## Revenue Profit_kat
## Max 1754 Az
## Min 101 Az
## Average <NA> <NA>
## 1 1754 Az
## 2 <NA> Az
## 3 <NA> Az
## 4 <NA> Az
## 5 <NA> Az
## 6 <NA> Az
##train için##
library(readxl)
train <- read_excel("train.xlsx")
train<-as.data.frame(train)
train$Profit_kat[train$Profit >= -3 & train$Profit <= 1350] <- "Az"
train$Profit_kat[train$Profit >= 1351 & train$Profit <= 2690] <- "Orta"
train$Profit_kat[train$Profit >= 2691] <- "Cok"
train$Age_Group<-ifelse(train$Age_Group>2.5,"Genç","GDegil")
train$Customer_Age<-as.factor(train$Customer_Age)
train$Customer_Gender<-as.factor(train$Customer_Gender)
train$Profit_kat<-as.factor(train$Profit_kat)
summary(train)
## Month Year Customer_Age Age_Group
## Length:708 Min. :2011 29 : 34 Length:708
## Class :character 1st Qu.:2013 31 : 33 Class :character
## Mode :character Median :2014 32 : 32 Mode :character
## Mean :2014 38 : 32
## 3rd Qu.:2016 33 : 27
## Max. :2016 23 : 26
## (Other):524
## Customer_Gender Country State Product_Category
## F:357 Length:708 Length:708 Length:708
## M:351 Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Sub_Category Product Order_Quantity Unit_Cost
## Length:708 Length:708 Min. : 1.00 Min. : 1.0
## Class :character Class :character 1st Qu.: 2.00 1st Qu.: 2.0
## Mode :character Mode :character Median :10.50 Median : 10.0
## Mean :12.09 Mean : 266.5
## 3rd Qu.:21.00 3rd Qu.: 42.0
## Max. :32.00 Max. :2171.0
##
## Unit_Price Profit Cost Revenue
## Min. : 2.0 Min. : -3.00 Min. : 1.00 Min. : 2.00
## 1st Qu.: 5.0 1st Qu.: 28.75 1st Qu.: 27.75 1st Qu.: 61.75
## Median : 27.0 Median : 95.00 Median : 104.50 Median : 213.50
## Mean : 451.5 Mean : 311.17 Mean : 513.62 Mean : 824.79
## 3rd Qu.: 64.0 3rd Qu.: 363.75 3rd Qu.: 420.00 3rd Qu.: 834.50
## Max. :3578.0 Max. :4030.00 Max. :6513.00 Max. :10305.00
##
## Profit_kat
## Az :679
## Cok : 8
## Orta: 21
##
##
##
##
##test için##
library(readxl)
test <- read_excel("test.xlsx")
test<-as.data.frame(test)
test$Profit_kat[test$Profit >= -3 & test$Profit <= 1350] <- "Az"
test$Profit_kat[test$Profit >= 1351 & test$Profit <= 2690] <- "Orta"
test$Profit_kat[test$Profit >= 2691] <- "Cok"
test$Age_Group<-ifelse(test$Age_Group>2.5,"Genç","GDegil")
test$Customer_Age<-as.factor(test$Customer_Age)
test$Customer_Gender<-as.factor(test$Customer_Gender)
test$Profit_kat<-as.factor(test$Profit_kat)
summary(test)
## Month Year Customer_Age Age_Group
## Length:176 Min. :2011 27 : 8 Length:176
## Class :character 1st Qu.:2014 30 : 8 Class :character
## Mode :character Median :2014 34 : 8 Mode :character
## Mean :2014 37 : 8
## 3rd Qu.:2015 50 : 8
## Max. :2016 25 : 7
## (Other):129
## Customer_Gender Country State Product_Category
## F:77 Length:176 Length:176 Length:176
## M:99 Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Sub_Category Product Order_Quantity Unit_Cost
## Length:176 Length:176 Min. : 1.00 Min. : 1.0
## Class :character Class :character 1st Qu.: 2.00 1st Qu.: 2.0
## Mode :character Mode :character Median :10.00 Median : 13.0
## Mean :11.59 Mean : 273.4
## 3rd Qu.:19.00 3rd Qu.: 42.0
## Max. :30.00 Max. :2171.0
##
## Unit_Price Profit Cost Revenue
## Min. : 2.00 Min. : 2.00 Min. : 2 Min. : 4.00
## 1st Qu.: 5.00 1st Qu.: 28.75 1st Qu.: 29 1st Qu.: 59.75
## Median : 35.00 Median : 96.00 Median : 143 Median : 277.00
## Mean : 468.94 Mean : 297.30 Mean : 493 Mean : 790.30
## 3rd Qu.: 58.75 3rd Qu.: 362.00 3rd Qu.: 477 3rd Qu.: 842.00
## Max. :3578.00 Max. :2826.00 Max. :6513 Max. :9339.00
##
## Profit_kat
## Az :172
## Cok : 1
## Orta: 3
##
##
##
##
dt<-table(train$Customer_Gender,train$Customer_Age)
prop.table(dt,2) # Column proportions
##
## 17 18 19 20 21 22 23
## F 0.2857143 0.3636364 0.3000000 0.4615385 0.5000000 0.4000000 0.5000000
## M 0.7142857 0.6363636 0.7000000 0.5384615 0.5000000 0.6000000 0.5000000
##
## 24 25 26 27 28 29 30
## F 0.5000000 0.5000000 0.5769231 0.5000000 0.4090909 0.5294118 0.6000000
## M 0.5000000 0.5000000 0.4230769 0.5000000 0.5909091 0.4705882 0.4000000
##
## 31 32 33 34 35 36 37
## F 0.5757576 0.5625000 0.5185185 0.5000000 0.5000000 0.6521739 0.7200000
## M 0.4242424 0.4375000 0.4814815 0.5000000 0.5000000 0.3478261 0.2800000
##
## 38 39 40 41 42 43 44
## F 0.5625000 0.6250000 0.5833333 0.5238095 0.5333333 0.5238095 0.2941176
## M 0.4375000 0.3750000 0.4166667 0.4761905 0.4666667 0.4761905 0.7058824
##
## 45 46 47 48 49 50 51
## F 0.5454545 0.3333333 0.2857143 0.3000000 0.8000000 0.3076923 0.4666667
## M 0.4545455 0.6666667 0.7142857 0.7000000 0.2000000 0.6923077 0.5333333
##
## 52 53 54 55 56 57 58
## F 0.2222222 0.5000000 0.2500000 0.5714286 0.3333333 0.5000000 0.6000000
## M 0.7777778 0.5000000 0.7500000 0.4285714 0.6666667 0.5000000 0.4000000
##
## 59 60 61 62 63 64 66
## F 0.6000000 1.0000000 0.4000000 0.5000000 0.5000000 0.0000000 0.0000000
## M 0.4000000 0.0000000 0.6000000 0.5000000 0.5000000 1.0000000 1.0000000
##
## 69 70 73 75
## F 0.5000000 1.0000000 1.0000000 0.0000000
## M 0.5000000 0.0000000 0.0000000 1.0000000
round(100*prop.table(dt,2), 2) # Round column prop to 2 digits (percents)
##
## 17 18 19 20 21 22 23 24 25 26
## F 28.57 36.36 30.00 46.15 50.00 40.00 50.00 50.00 50.00 57.69
## M 71.43 63.64 70.00 53.85 50.00 60.00 50.00 50.00 50.00 42.31
##
## 27 28 29 30 31 32 33 34 35 36
## F 50.00 40.91 52.94 60.00 57.58 56.25 51.85 50.00 50.00 65.22
## M 50.00 59.09 47.06 40.00 42.42 43.75 48.15 50.00 50.00 34.78
##
## 37 38 39 40 41 42 43 44 45 46
## F 72.00 56.25 62.50 58.33 52.38 53.33 52.38 29.41 54.55 33.33
## M 28.00 43.75 37.50 41.67 47.62 46.67 47.62 70.59 45.45 66.67
##
## 47 48 49 50 51 52 53 54 55 56
## F 28.57 30.00 80.00 30.77 46.67 22.22 50.00 25.00 57.14 33.33
## M 71.43 70.00 20.00 69.23 53.33 77.78 50.00 75.00 42.86 66.67
##
## 57 58 59 60 61 62 63 64 66 69
## F 50.00 60.00 60.00 100.00 40.00 50.00 50.00 0.00 0.00 50.00
## M 50.00 40.00 40.00 0.00 60.00 50.00 50.00 100.00 100.00 50.00
##
## 70 73 75
## F 100.00 100.00 0.00
## M 0.00 0.00 100.00
addmargins(round(prop.table(dt,2), 2),1)
##
## 17 18 19 20 21 22 23 24 25 26 27 28 29 30
## F 0.29 0.36 0.30 0.46 0.50 0.40 0.50 0.50 0.50 0.58 0.50 0.41 0.53 0.60
## M 0.71 0.64 0.70 0.54 0.50 0.60 0.50 0.50 0.50 0.42 0.50 0.59 0.47 0.40
## Sum 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00
##
## 31 32 33 34 35 36 37 38 39 40 41 42 43 44
## F 0.58 0.56 0.52 0.50 0.50 0.65 0.72 0.56 0.62 0.58 0.52 0.53 0.52 0.29
## M 0.42 0.44 0.48 0.50 0.50 0.35 0.28 0.44 0.38 0.42 0.48 0.47 0.48 0.71
## Sum 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00
##
## 45 46 47 48 49 50 51 52 53 54 55 56 57 58
## F 0.55 0.33 0.29 0.30 0.80 0.31 0.47 0.22 0.50 0.25 0.57 0.33 0.50 0.60
## M 0.45 0.67 0.71 0.70 0.20 0.69 0.53 0.78 0.50 0.75 0.43 0.67 0.50 0.40
## Sum 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00
##
## 59 60 61 62 63 64 66 69 70 73 75
## F 0.60 1.00 0.40 0.50 0.50 0.00 0.00 0.50 1.00 1.00 0.00
## M 0.40 0.00 0.60 0.50 0.50 1.00 1.00 0.50 0.00 0.00 1.00
## Sum 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00
library(DescTools)
## Registered S3 method overwritten by 'DescTools':
## method from
## plot.bagplot aplpack
##
## Attaching package: 'DescTools'
## The following object is masked from 'package:aplpack':
##
## plot.bagplot
## The following objects are masked from 'package:psych':
##
## AUC, ICC, SD
## The following objects are masked from 'package:caret':
##
## MAE, RMSE
## The following objects are masked from 'package:Hmisc':
##
## %nin%, Label, Mean, Quantile
Assocs(dt)[15:17,1] #phi and contingency coeff
## Uncertainty Coeff. R|C Uncertainty Coeff. sym Mutual Information
## 0.04489897 0.01418735 0.04489697
library("gplots")
## Registered S3 method overwritten by 'gplots':
## method from
## reorder.factor DescTools
##
## Attaching package: 'gplots'
## The following object is masked from 'package:DescTools':
##
## reorder.factor
## The following object is masked from 'package:PerformanceAnalytics':
##
## textplot
## The following object is masked from 'package:stats':
##
## lowess
balloonplot(t(dt), main ="Cost ve Profit Kategorileri ", xlab ="", ylab="",
label = FALSE,show.margins = FALSE)
##Chi-square##
dt_c<-table(train$Cost,train$Profit_kat)
dtc_exp <- chisq.test(dt_c)$expected
## Warning in chisq.test(dt_c): Chi-squared approximation may be incorrect
rowcs <- function(i, obs, exp) {
sum(((obs[i,] - exp[i,])^2)/exp[i,])
}
chi_dtc<-as.matrix(lapply(seq_len(nrow(dt_c)), rowcs, obs = dt_c, exp = dtc_exp))
rownames(chi_dtc)<-rownames(dt_c)
chi_dtc
## [,1]
## 1 0.2989691
## 2 0.2135493
## 3 0.2989691
## 4 0.2562592
## 5 0.08541973
## 6 0.4270987
## 7 0.2562592
## 8 0.2989691
## 9 0.1708395
## 10 0.2135493
## 11 0.08541973
## 12 0.5552283
## 13 0.4698085
## 14 0.4698085
## 15 0.2562592
## 16 0.2989691
## 17 0.1708395
## 18 0.1708395
## 19 0.08541973
## 20 0.2562592
## 21 0.2135493
## 22 0.2135493
## 23 0.04270987
## 24 0.5979381
## 25 0.04270987
## 26 0.7260677
## 27 0.3843888
## 28 0.4270987
## 29 0.1708395
## 30 0.4270987
## 32 0.1708395
## 33 0.04270987
## 34 0.1708395
## 35 0.04270987
## 36 0.08541973
## 38 0.08541973
## 39 0.1281296
## 40 0.2562592
## 42 0.3843888
## 44 0.2135493
## 45 0.2562592
## 46 0.1281296
## 48 0.3416789
## 49 0.1281296
## 50 0.2562592
## 51 0.08541973
## 52 0.2135493
## 54 0.2562592
## 55 0.04270987
## 56 0.4270987
## 57 0.1281296
## 58 0.2135493
## 60 0.2989691
## 62 0.08541973
## 63 0.1708395
## 64 0.1281296
## 65 0.08541973
## 66 0.04270987
## 68 0.08541973
## 70 0.04270987
## 72 0.04270987
## 76 0.08541973
## 78 0.1708395
## 80 0.04270987
## 81 0.08541973
## 84 0.1281296
## 87 0.04270987
## 88 0.08541973
## 90 0.2135493
## 91 0.2989691
## 96 0.08541973
## 104 0.2562592
## 105 0.08541973
## 112 0.08541973
## 116 0.04270987
## 117 0.2989691
## 119 0.1281296
## 120 0.08541973
## 126 0.1708395
## 130 0.1281296
## 132 0.08541973
## 133 0.04270987
## 135 0.04270987
## 140 0.04270987
## 143 0.2989691
## 144 0.1708395
## 147 0.04270987
## 152 0.08541973
## 156 0.08541973
## 160 0.08541973
## 161 0.04270987
## 165 0.04270987
## 168 0.1708395
## 169 0.08541973
## 171 0.08541973
## 176 0.08541973
## 180 0.04270987
## 182 0.1281296
## 184 0.08541973
## 189 0.04270987
## 190 0.04270987
## 192 0.08541973
## 195 0.04270987
## 196 0.04270987
## 198 0.08541973
## 200 0.08541973
## 203 0.04270987
## 208 0.2562592
## 216 0.04270987
## 220 0.04270987
## 221 0.08541973
## 224 0.08541973
## 228 0.04270987
## 234 0.2562592
## 247 0.08541973
## 252 0.1281296
## 260 0.08541973
## 264 0.04270987
## 273 0.1281296
## 275 0.04270987
## 286 0.1281296
## 294 0.08541973
## 295 0.08541973
## 299 0.2562592
## 304 0.04270987
## 308 0.04270987
## 312 0.2989691
## 325 0.1708395
## 336 0.08541973
## 338 0.3843888
## 344 0.3843888
## 351 0.1281296
## 354 0.04270987
## 364 0.1281296
## 377 0.08541973
## 380 0.08541973
## 390 0.2135493
## 399 0.04270987
## 403 0.08541973
## 420 0.2562592
## 441 0.04270987
## 461 0.1708395
## 462 0.04270987
## 472 0.04270987
## 487 0.3416789
## 494 0.04270987
## 504 0.04270987
## 532 0.1281296
## 567 0.04270987
## 588 0.1281296
## 608 0.04270987
## 616 0.04270987
## 650 0.04270987
## 688 0.04270987
## 713 0.4270987
## 722 0.04270987
## 754 0.04270987
## 755 0.1281296
## 756 0.04270987
## 760 0.04270987
## 765 0.04270987
## 798 0.04270987
## 840 0.04270987
## 874 0.04270987
## 912 0.04270987
## 922 0.04270987
## 924 0.04270987
## 950 0.08541973
## 966 0.04270987
## 1026 0.04270987
## 1032 0.04270987
## 1050 0.08541973
## 1083 0.1708395
## 1092 0.04270987
## 1102 0.04270987
## 1140 0.04270987
## 1176 0.04270987
## 1218 0.04270987
## 1252 0.4698085
## 1260 0.04270987
## 1266 0.640648
## 1376 0.04270987
## 1426 0.2562592
## 1482 0.2989691
## 1510 0.04270987
## 1519 0.04270987
## 1555 0.4270987
## 1898 0.04270987
## 1912 32.71429
## 1948 0.04270987
## 2166 0.1281296
## 2171 0.4270987
## 2265 0.04270987
## 2504 130.8571
## 2852 32.71429
## 3038 9.628375
## 3110 33.18704
## 3756 98.14286
## 3796 59.10714
## 3798 203.5536
## 4332 0.04270987
## 4342 52.09996
## 4446 0.04270987
## 5008 32.71429
## 5064 87.5
## 5694 32.71429
## 6513 118.2143
##Ki karelerini hesaplıyoruz##
##Cost-Unit_Cost## ##train için##
ggplot(train, aes(Cost,Unit_Cost))+
geom_point()+
geom_smooth(method = "loess", col="violet",se = FALSE)
## `geom_smooth()` using formula 'y ~ x'
train$Unit_Cost_log<-log10(train$Unit_Cost)
##maaliyetin artarak azaldığını söyleyebiliriz## ##test için##
ggplot(test, aes(Cost,Unit_Cost))+
geom_point()+
geom_smooth(method = "loess", col="violet",se = FALSE)
## `geom_smooth()` using formula 'y ~ x'
test$Unit_Cost_log<-log10(test$Unit_Cost)
##maaliyetin artarak azaldığını söyleyebiliriz##
##Revenue-Unit_Cost## ##train içn##
hist(train$Revenue)
train$Revenue_log<-log10(train$Revenue)
hist(train$Revenue_log)
train$Revenue_kok<-sqrt(train$Revenue)
hist(train$Revenue_kok)
ggplot(train, aes(Revenue_kok,Unit_Cost_log))+
geom_point(size=1)+
geom_text(label=rownames(train),nudge_x=0.04,check_overlap=T,size=2.5)+
geom_smooth(method = "loess", col="pink",se = FALSE)
## `geom_smooth()` using formula 'y ~ x'
##logaritmik dönüşüm yaparak gelirin artarak azaldığını söyleyebiliriz##
##test için##
hist(test$Revenue)
test$Revenue_log<-log10(test$Revenue)
hist(test$Revenue_log)
test$Revenue_kok<-sqrt(test$Revenue)
hist(test$Revenue_kok)
##test verisinde atadığımız revenue değişkenlerinin baktığımızda değerlerini gözlemliyoruz##
##Profit kategorisi - Donusturulmus Profit kutu cizimi##
is_outlier <- function(x) {
return(x < quantile(x, 0.25) - 1.5 * IQR(x) | x > quantile(x, 0.75) + 1.5 * IQR(x))
}
library(dplyr)
dat <- train %>% tibble::rownames_to_column(var="outlier") %>% group_by(Profit_kat) %>% mutate(is_outlier=ifelse(is_outlier(Unit_Cost_log), Unit_Cost_log_log, as.numeric(NA)))
dat$outlier[which(is.na(dat$is_outlier))] <- as.numeric(NA)
ggplot(dat, aes(y=Unit_Cost_log, x=factor(Profit_kat),fill=Profit_kat))+
geom_boxplot() +
geom_text(aes(label=outlier),na.rm=TRUE,nudge_x=0.15,size=3.5)+
labs(title="Kar kategorileri bazında log(maaliyet) Kutu Cizimi",x="kar Kategorileri", y = "log(maaliyet)")+
scale_fill_discrete(name = "Kar Kategorileri")
###polinomial #merkezilestirme:## ##train için##
mean_revenue<-mean(train$Revenue)
train$Revenue_merk<-(train$Revenue-mean_revenue)
library(ggplot2)
ggplot(train, aes(x = Revenue_merk, y =Unit_Cost_log )) +
stat_smooth(method = "lm", se = FALSE, color = "violet", formula = y ~ x) +
stat_smooth(method = "lm", se = FALSE, color = "lightblue", formula = y ~ x + I(x ^ 2)) +
stat_smooth(method = "lm", se = FALSE, color = "pink", formula = y ~ x + I(x ^ 2)+ I(x ^ 3)) +
geom_point(colour = "black", size = 1)
##Geliri merkezleştirip grafiği inceliyoruz## ##Noktaları en iyi temsil eden çizgi pembe çizgidir. Bu da kübik dönüşüm gerektiğini gösterir##
##test için##
mean_revenue<-mean(test$Revenue)
test$Revenue_merk<-(test$Revenue-mean_revenue)
library(ggplot2)
ggplot(test, aes(x = Revenue_merk, y =Unit_Cost_log )) +
stat_smooth(method = "lm", se = FALSE, color = "violet", formula = y ~ x) +
stat_smooth(method = "lm", se = FALSE, color = "lightblue", formula = y ~ x + I(x ^ 2)) +
stat_smooth(method = "lm", se = FALSE, color = "pink", formula = y ~ x + I(x ^ 2)+ I(x ^ 3)) +
geom_point(colour = "black", size = 1)
##Geliri merkezleştirip grafiği inceliyoruz## ##Noktaları en iyi temsil eden çizgi pembe çizgidir. Bu da kübik dönüşüm gerektiğini gösterir##
##köklü geliri merkezilestirip karesel terimlerine bakma:## ##train için##
mean_Revenuekok<-mean(train$Revenue_kok)
train$Revenue_kok_merk<-(train$Revenue_kok-mean_Revenuekok)
library(ggplot2)
ggplot(train, aes(x = Revenue_kok_merk, y =Unit_Cost_log )) +
stat_smooth(method = "lm", se = FALSE, color = "violet", formula = y ~ x) +
stat_smooth(method = "lm", se = FALSE, color = "lightblue", formula = y ~ x + I(x ^ 2)) +
stat_smooth(method = "lm", se = FALSE, color = "pink", formula = y ~ x + I(x ^ 2)+ I(x ^ 3)) +
geom_point(colour = "black", size = 1)
##test için##
mean_Revenuekok<-mean(test$Revenue_kok)
test$Revenue_kok_merk<-(test$Revenue_kok-mean_Revenuekok)
library(ggplot2)
ggplot(test, aes(x = Revenue_kok_merk, y =Unit_Cost_log )) +
stat_smooth(method = "lm", se = FALSE, color = "violet", formula = y ~ x) +
stat_smooth(method = "lm", se = FALSE, color = "lightblue", formula = y ~ x + I(x ^ 2)) +
stat_smooth(method = "lm", se = FALSE, color = "pink", formula = y ~ x + I(x ^ 2)+ I(x ^ 3)) +
geom_point(colour = "black", size = 1)
##Tukey’s Ladder ## ##train için##
library(rcompanion)
##
## Attaching package: 'rcompanion'
## The following object is masked from 'package:psych':
##
## phi
Unit_Cost_tukey<-transformTukey(train$Unit_Cost,plotit=FALSE)
##
## lambda W Shapiro.p.value
## 390 -0.275 0.9195 5.705e-19
##
## if (lambda > 0){TRANS = x ^ lambda}
## if (lambda == 0){TRANS = log(x)}
## if (lambda < 0){TRANS = -1 * x ^ lambda}
Revenue_tukey<- transformTukey(train$Revenue, plotit=FALSE)
##
## lambda W Shapiro.p.value
## 402 0.025 0.99 9.739e-05
##
## if (lambda > 0){TRANS = x ^ lambda}
## if (lambda == 0){TRANS = log(x)}
## if (lambda < 0){TRANS = -1 * x ^ lambda}
##test için##
library(rcompanion)
Unit_Cost_tukey<-transformTukey(test$Unit_Cost,plotit=FALSE)
##
## lambda W Shapiro.p.value
## 391 -0.25 0.9161 1.668e-08
##
## if (lambda > 0){TRANS = x ^ lambda}
## if (lambda == 0){TRANS = log(x)}
## if (lambda < 0){TRANS = -1 * x ^ lambda}
Revenue_tukey<- transformTukey(test$Revenue, plotit=FALSE)
##
## lambda W Shapiro.p.value
## 402 0.025 0.9811 0.01713
##
## if (lambda > 0){TRANS = x ^ lambda}
## if (lambda == 0){TRANS = log(x)}
## if (lambda < 0){TRANS = -1 * x ^ lambda}
##BOX-COX##
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
#Unit_Cost icin
Box_Unit_Cost<- boxcox(train$Unit_Cost ~ 1,
lambda = seq(-6,6,0.1)) # Try values -6 to 6 by 0.1
Cox_Unit_Cost<- data.frame(Box_Unit_Cost$x, Box_Unit_Cost$y)
Cox_Unit_Cost <- Cox_Unit_Cost[order(-Cox_Unit_Cost$Box_Unit_Cost.y),]
Cox_Unit_Cost[1,]
## Box_Unit_Cost.x Box_Unit_Cost.y
## 59 -0.2 -2890.729
lambda <- Cox_Unit_Cost[1, "Box_Unit_Cost.x"]
lambda
## [1] -0.2
##çarpıklık gözlemlemiyoruz##
##Revenue icin##
Box_Revenue<- boxcox(train$Revenue ~ 1,
lambda = seq(-6,6,0.1)) # Try values -6 to 6 by 0.1
Cox_Revenue<- data.frame(Box_Revenue$x, Box_Revenue$y)
Cox_Revenue <- Cox_Revenue[order(-Cox_Revenue$Box_Revenue.y),]
Cox_Revenue[1,]
## Box_Revenue.x Box_Revenue.y
## 61 0 -2721.933
lambda_Revenue<- Cox_Revenue[1, "Box_Revenue.x"]
lambda_Revenue
## [1] 0
##çarpıklık gözlemlemiyoruz##
##ham hali uzerınden sacılım matrisi##
orj<-train[,c(15,12,16)] # Bagımlı degisken sag alt koseye alındı
library(PerformanceAnalytics)
chart.Correlation(orj, histogram=TRUE, pch=19)
plot(train$Cost,train$Unit_Cost) #kontrol
plot(train$Revenue,train$Unit_Cost) #kontrol
##donusturulmus degiskenler uzerinden sacılım matrisi##
transform_train<-train[,c(15,12,16)]
chart.Correlation(transform_train, histogram=TRUE, pch=19)
##Veriyi dönüştürdükten sonra saçılım matrisinde büyük bir değişme olmadığını gözlemliyoruz##
##test kumesi##
test$Customer_Gender<-as.factor(test$Customer_Gender)
test$Customer_Gender<-as.factor(test$Customer_Gender)
summary(test)
## Month Year Customer_Age Age_Group
## Length:176 Min. :2011 27 : 8 Length:176
## Class :character 1st Qu.:2014 30 : 8 Class :character
## Mode :character Median :2014 34 : 8 Mode :character
## Mean :2014 37 : 8
## 3rd Qu.:2015 50 : 8
## Max. :2016 25 : 7
## (Other):129
## Customer_Gender Country State Product_Category
## F:77 Length:176 Length:176 Length:176
## M:99 Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Sub_Category Product Order_Quantity Unit_Cost
## Length:176 Length:176 Min. : 1.00 Min. : 1.0
## Class :character Class :character 1st Qu.: 2.00 1st Qu.: 2.0
## Mode :character Mode :character Median :10.00 Median : 13.0
## Mean :11.59 Mean : 273.4
## 3rd Qu.:19.00 3rd Qu.: 42.0
## Max. :30.00 Max. :2171.0
##
## Unit_Price Profit Cost Revenue
## Min. : 2.00 Min. : 2.00 Min. : 2 Min. : 4.00
## 1st Qu.: 5.00 1st Qu.: 28.75 1st Qu.: 29 1st Qu.: 59.75
## Median : 35.00 Median : 96.00 Median : 143 Median : 277.00
## Mean : 468.94 Mean : 297.30 Mean : 493 Mean : 790.30
## 3rd Qu.: 58.75 3rd Qu.: 362.00 3rd Qu.: 477 3rd Qu.: 842.00
## Max. :3578.00 Max. :2826.00 Max. :6513 Max. :9339.00
##
## Profit_kat Unit_Cost_log Revenue_log Revenue_kok Revenue_merk
## Az :172 Min. :0.000 Min. :0.6021 Min. : 2.00 Min. :-786.3
## Cok : 1 1st Qu.:0.301 1st Qu.:1.7763 1st Qu.: 7.73 1st Qu.:-730.6
## Orta: 3 Median :1.114 Median :2.4425 Median :16.64 Median :-513.3
## Mean :1.262 Mean :2.3919 Mean :21.81 Mean : 0.0
## 3rd Qu.:1.623 3rd Qu.:2.9253 3rd Qu.:29.02 3rd Qu.: 51.7
## Max. :3.337 Max. :3.9703 Max. :96.64 Max. :8548.7
##
## Revenue_kok_merk
## Min. :-19.806
## 1st Qu.:-14.076
## Median : -5.162
## Mean : 0.000
## 3rd Qu.: 7.211
## Max. : 74.833
##
##Secenek modeller## ##train için##
fit1<-lm(Unit_Cost ~ Revenue+ Cost+Customer_Gender, data=train)
summary(fit1)
##
## Call:
## lm(formula = Unit_Cost ~ Revenue + Cost + Customer_Gender, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1143.60 -78.97 -38.52 -17.41 1127.94
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.695804 16.649217 0.883 0.378
## Revenue -0.004189 0.049389 -0.085 0.932
## Cost 0.480449 0.073980 6.494 1.58e-10 ***
## Customer_GenderM 17.185351 21.832776 0.787 0.431
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 289.9 on 704 degrees of freedom
## Multiple R-squared: 0.7209, Adjusted R-squared: 0.7198
## F-statistic: 606.3 on 3 and 704 DF, p-value: < 2.2e-16
##Revenue ve customer genderın anlamsız olduğu gözlemleniyor##
##test için##
fit1<-lm(Unit_Cost ~ Revenue+ Cost+Customer_Gender, data=test)
summary(fit1)
##
## Call:
## lm(formula = Unit_Cost ~ Revenue + Cost + Customer_Gender, data = test)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1054.11 -129.04 -10.98 23.74 894.89
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -33.34444 33.67622 -0.990 0.3235
## Revenue 0.57172 0.09968 5.736 4.29e-08 ***
## Cost -0.31949 0.14403 -2.218 0.0278 *
## Customer_GenderM 22.02270 41.35447 0.533 0.5950
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 271.3 on 172 degrees of freedom
## Multiple R-squared: 0.7563, Adjusted R-squared: 0.7521
## F-statistic: 177.9 on 3 and 172 DF, p-value: < 2.2e-16
##Customer genderın anlamsız olduğu gözlemleniyor##
##Tahmin##
predictions <- predict(fit1, test) #test uzerınden
##Model performans##
#train:
round(defaultSummary(data.frame(obs=train$Unit_Cost,pred=predict(fit1,train))),2)
## RMSE Rsquared MAE
## 320.55 0.68 179.60
#test:
round(defaultSummary(data.frame(obs=test$Unit_Cost,pred=predict(fit1,test))),2)
## RMSE Rsquared MAE
## 268.21 0.76 166.21
library(ggfortify)
autoplot(fit1)
## Warning: `arrange_()` was deprecated in dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
#Modelleme- polinomial #merkezilestirilmis uzerinden##
fit2<-lm(Unit_Cost_log ~ Revenue_kok_merk + I(Revenue_kok_merk^2)+I(Revenue_kok_merk^3)+Customer_Gender+Cost , data = train)
summary(fit2)
##
## Call:
## lm(formula = Unit_Cost_log ~ Revenue_kok_merk + I(Revenue_kok_merk^2) +
## I(Revenue_kok_merk^3) + Customer_Gender + Cost, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9875 -0.3367 -0.1033 0.2520 1.2640
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.129e+00 4.171e-02 27.068 < 2e-16 ***
## Revenue_kok_merk 4.220e-02 3.540e-03 11.920 < 2e-16 ***
## I(Revenue_kok_merk^2) -7.710e-04 1.309e-04 -5.892 5.94e-09 ***
## I(Revenue_kok_merk^3) -4.265e-06 1.349e-06 -3.162 0.00163 **
## Customer_GenderM 1.007e-02 3.401e-02 0.296 0.76728
## Cost 7.895e-04 1.174e-04 6.724 3.66e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4516 on 702 degrees of freedom
## Multiple R-squared: 0.8215, Adjusted R-squared: 0.8202
## F-statistic: 646.2 on 5 and 702 DF, p-value: < 2.2e-16
##Customer genderın anlamsız olduğu gözlemleniyor##
fit2<-lm(Unit_Cost_log ~ Revenue_kok +Cost+Customer_Gender, data = train)
summary(fit2)
##
## Call:
## lm(formula = Unit_Cost_log ~ Revenue_kok + Cost + Customer_Gender,
## data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.3272 -0.3586 -0.1083 0.2775 1.2920
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.047e-01 4.399e-02 -2.381 0.0175 *
## Revenue_kok 7.194e-02 2.662e-03 27.029 <2e-16 ***
## Cost -4.794e-04 5.131e-05 -9.343 <2e-16 ***
## Customer_GenderM 2.620e-02 3.779e-02 0.693 0.4884
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5022 on 704 degrees of freedom
## Multiple R-squared: 0.7787, Adjusted R-squared: 0.7778
## F-statistic: 825.8 on 3 and 704 DF, p-value: < 2.2e-16
##Customer genderın anlamsız olduğu gözlemleniyor##
fit2_res<-as.data.frame(t(defaultSummary(data.frame(obs=train$Unit_Cost_log,pred=predict(fit2,train)))))
rownames(fit2_res)<-"fit2"
fit3<-lm(Unit_Cost_log ~ Revenue_kok_merk + I(Revenue_kok_merk^2)+I(Revenue_kok_merk^3)+Customer_Gender+Customer_Gender*Revenue_kok_merk , data = train)
summary(fit3)
##
## Call:
## lm(formula = Unit_Cost_log ~ Revenue_kok_merk + I(Revenue_kok_merk^2) +
## I(Revenue_kok_merk^3) + Customer_Gender + Customer_Gender *
## Revenue_kok_merk, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2204 -0.3305 -0.1002 0.2549 1.2786
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.319e+00 3.168e-02 41.627 < 2e-16 ***
## Revenue_kok_merk 6.353e-02 1.689e-03 37.601 < 2e-16 ***
## I(Revenue_kok_merk^2) -1.332e-04 9.297e-05 -1.433 0.152
## I(Revenue_kok_merk^3) -5.863e-06 1.370e-06 -4.279 2.14e-05 ***
## Customer_GenderM 2.116e-02 3.504e-02 0.604 0.546
## Revenue_kok_merk:Customer_GenderM 1.278e-03 1.863e-03 0.686 0.493
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4658 on 702 degrees of freedom
## Multiple R-squared: 0.8101, Adjusted R-squared: 0.8088
## F-statistic: 599.1 on 5 and 702 DF, p-value: < 2.2e-16
##Revenue_kok_merk^3ün ve Customer genderın anlamsız olduğu gözlemleniyor##
fit3_res<-as.data.frame(t(defaultSummary(data.frame(obs=train$Unit_Cost_log,pred=predict(fit3,train)))))
rownames(fit3_res)<-"fit3"
fit4<-lm(Unit_Cost_log ~ Cost+Customer_Gender, data = train)
summary(fit4)
##
## Call:
## lm(formula = Unit_Cost_log ~ Cost + Customer_Gender, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.72091 -0.53290 -0.00865 0.28461 1.49277
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.094e-01 4.012e-02 20.17 <2e-16 ***
## Cost 8.058e-04 2.752e-05 29.29 <2e-16 ***
## Customer_GenderM -7.020e-03 5.388e-02 -0.13 0.896
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7163 on 705 degrees of freedom
## Multiple R-squared: 0.5491, Adjusted R-squared: 0.5478
## F-statistic: 429.3 on 2 and 705 DF, p-value: < 2.2e-16
##Customer genderın anlamsız olduğu gözlemleniyor##
fit4<-lm(Unit_Cost_log ~ Cost+Customer_Gender, data = train)
summary(fit4)
##
## Call:
## lm(formula = Unit_Cost_log ~ Cost + Customer_Gender, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.72091 -0.53290 -0.00865 0.28461 1.49277
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.094e-01 4.012e-02 20.17 <2e-16 ***
## Cost 8.058e-04 2.752e-05 29.29 <2e-16 ***
## Customer_GenderM -7.020e-03 5.388e-02 -0.13 0.896
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7163 on 705 degrees of freedom
## Multiple R-squared: 0.5491, Adjusted R-squared: 0.5478
## F-statistic: 429.3 on 2 and 705 DF, p-value: < 2.2e-16
##Customer genderın anlamsız olduğu gözlemleniyor##
fit4_res<-as.data.frame(t(defaultSummary(data.frame(obs=train$Unit_Cost_log,pred=predict(fit4,train)))))
rownames(fit4_res)<-"fit4"
fit5<-lm(Unit_Cost_log ~ Cost, data = train)
summary(fit5)
##
## Call:
## lm(formula = Unit_Cost_log ~ Cost, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.71662 -0.53230 -0.00621 0.28706 1.48923
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.059e-01 3.038e-02 26.53 <2e-16 ***
## Cost 8.057e-04 2.748e-05 29.32 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7158 on 706 degrees of freedom
## Multiple R-squared: 0.5491, Adjusted R-squared: 0.5484
## F-statistic: 859.7 on 1 and 706 DF, p-value: < 2.2e-16
fit5_res<-as.data.frame(t(defaultSummary(data.frame(obs=train$Unit_Cost_log,pred=predict(fit5,train)))))
rownames(fit5_res)<-"fit5"
round(rbind(fit2_res,fit3_res,fit4_res,fit5_res),3)
## RMSE Rsquared MAE
## fit2 0.501 0.779 0.402
## fit3 0.464 0.810 0.370
## fit4 0.715 0.549 0.546
## fit5 0.715 0.549 0.546
##R-squared değerinde en büyük fit 2 modeli olduğu için fit 3ü seçiyoruz##
na.omit(test)
## Month Year Customer_Age Age_Group Customer_Gender Country
## 1 August 2015 53 Genç M United States
## 2 April 2016 50 Genç F Australia
## 3 June 2014 18 Genç F Australia
## 4 October 2015 23 Genç M United Kingdom
## 5 January 2016 36 Genç F Australia
## 6 June 2014 49 Genç F United States
## 7 March 2016 32 Genç F United States
## 8 April 2014 28 Genç M United States
## 9 November 2013 40 Genç F United States
## 10 April 2014 29 Genç F United Kingdom
## 11 March 2014 27 Genç M Australia
## 12 June 2014 24 Genç M United States
## 13 April 2016 43 Genç M Germany
## 14 July 2016 37 Genç F United States
## 15 September 2013 52 Genç F United States
## 16 September 2013 43 Genç M United States
## 17 June 2014 45 Genç F United States
## 18 January 2016 31 Genç F Australia
## 19 April 2014 31 Genç F Australia
## 20 July 2013 36 Genç M United States
## 21 January 2014 59 Genç F Canada
## 22 June 2016 57 Genç M Australia
## 23 April 2014 40 Genç F Australia
## 24 April 2016 34 Genç M United States
## 25 November 2013 30 Genç M United Kingdom
## 26 January 2014 42 Genç M United States
## 27 September 2013 44 Genç F United Kingdom
## 28 September 2013 25 Genç F Australia
## 29 May 2014 53 Genç M Australia
## 30 January 2014 24 Genç M Australia
## 31 April 2016 34 Genç M United States
## 32 January 2014 44 Genç F Australia
## 33 February 2016 50 Genç M Germany
## 38 January 2016 35 Genç M France
## 39 April 2016 61 Genç M United Kingdom
## 40 February 2014 37 Genç F Australia
## 41 November 2013 25 Genç F Canada
## 42 September 2015 36 Genç F United States
## 43 December 2015 48 Genç M United Kingdom
## 44 April 2014 51 Genç M Canada
## 45 September 2015 42 Genç M France
## 46 September 2015 23 Genç F Australia
## 47 July 2015 36 Genç M Australia
## 48 July 2016 28 Genç F Germany
## 49 May 2014 51 Genç M United States
## 50 September 2015 33 Genç F Australia
## 51 April 2014 37 Genç M United Kingdom
## 52 December 2013 30 Genç F United States
## 53 May 2016 50 Genç F United Kingdom
## 54 January 2014 34 Genç M Canada
## 55 October 2013 52 Genç M United States
## 56 December 2015 18 Genç M Canada
## 57 May 2014 20 Genç M United States
## 58 September 2015 35 Genç F United Kingdom
## 59 April 2016 50 Genç F Australia
## 60 September 2015 44 Genç M United States
## 61 April 2014 44 Genç M United States
## 62 February 2016 30 Genç M United Kingdom
## 63 March 2014 48 Genç F United States
## 68 January 2014 27 Genç F Germany
## 69 January 2016 39 Genç F United Kingdom
## 70 April 2016 35 Genç M Australia
## 71 March 2014 36 Genç F Australia
## 72 January 2014 31 Genç M Canada
## 73 November 2015 32 Genç M United States
## 74 September 2013 23 Genç F Australia
## 75 August 2015 68 Genç M United States
## 76 January 2016 39 Genç M Australia
## 77 March 2016 55 Genç M United States
## 78 January 2016 54 Genç M United States
## 79 November 2015 47 Genç M United Kingdom
## 80 November 2013 27 Genç M Australia
## 81 February 2014 32 Genç M Australia
## 82 June 2015 24 Genç M United States
## 83 January 2015 22 Genç F United Kingdom
## 84 May 2014 33 Genç F Germany
## 85 February 2014 34 Genç M Australia
## 86 December 2015 46 Genç M United Kingdom
## 87 May 2014 30 Genç M United States
## 88 October 2013 48 Genç M Australia
## 89 November 2013 45 Genç F United States
## 90 April 2015 35 Genç F Australia
## 91 November 2013 54 Genç M United States
## 92 December 2015 54 Genç M United States
## 93 June 2016 28 Genç M France
## 94 September 2011 27 Genç M United States
## 95 August 2013 27 Genç M United States
## 96 May 2016 46 Genç M United States
## 97 June 2016 34 Genç F United States
## 98 August 2013 38 Genç M United States
## 99 February 2016 52 Genç F France
## 100 April 2012 24 Genç M United States
## 101 September 2015 45 Genç M United States
## 102 March 2012 60 Genç F United States
## 103 April 2011 30 Genç M United States
## 104 March 2011 39 Genç M United States
## 105 December 2013 21 Genç F United Kingdom
## 106 March 2012 44 Genç M Australia
## 107 October 2015 18 Genç F Australia
## 108 November 2015 46 Genç M Australia
## 109 January 2012 34 Genç M United States
## 110 September 2015 34 Genç M United States
## 111 January 2016 35 Genç F Australia
## 112 February 2014 27 Genç F France
## 113 November 2015 25 Genç F Germany
## 114 June 2014 56 Genç F Germany
## 115 May 2016 26 Genç M France
## 116 September 2011 41 Genç M United States
## 117 March 2015 30 Genç M United Kingdom
## 118 April 2016 44 Genç F United States
## 119 February 2016 39 Genç F United Kingdom
## 120 January 2014 21 Genç M United States
## 121 July 2016 50 Genç F Australia
## 122 July 2014 53 Genç F United States
## 123 October 2015 27 Genç F United States
## 124 March 2014 26 Genç F United States
## 125 February 2014 25 Genç M Canada
## 126 February 2016 37 Genç F Australia
## 127 March 2014 64 Genç M United States
## 128 January 2016 37 Genç M Canada
## 129 October 2013 54 Genç M Canada
## 130 June 2014 24 Genç M Australia
## 131 September 2013 71 Genç M United Kingdom
## 132 February 2016 20 Genç M France
## 133 July 2014 31 Genç F Australia
## 134 August 2015 25 Genç F United States
## 135 March 2014 39 Genç F Australia
## 136 November 2015 38 Genç F United Kingdom
## 137 January 2016 22 Genç M Australia
## 138 January 2016 32 Genç F United States
## 139 June 2016 28 Genç M Germany
## 140 March 2014 33 Genç M United States
## 141 May 2016 37 Genç M United States
## 142 November 2013 54 Genç M United States
## 143 June 2014 38 Genç M Canada
## 144 January 2014 56 Genç F United States
## 145 January 2016 41 Genç F United States
## 146 July 2014 49 Genç F Canada
## 147 April 2016 25 Genç M United Kingdom
## 148 January 2014 26 Genç M Germany
## 149 July 2013 40 Genç F Canada
## 150 October 2015 18 Genç M Australia
## 151 June 2014 32 Genç M Germany
## 152 December 2015 32 Genç F United Kingdom
## 153 April 2014 49 Genç M Canada
## 154 December 2013 33 Genç M Germany
## 155 December 2013 50 Genç M Australia
## 156 December 2013 53 Genç F Germany
## 157 January 2014 40 Genç F France
## 158 September 2015 30 Genç F Australia
## 159 April 2014 45 Genç F United States
## 160 March 2016 28 Genç M Canada
## 161 November 2015 43 Genç M Canada
## 162 December 2013 24 Genç M France
## 163 August 2015 30 Genç F Australia
## 164 June 2014 20 Genç M United States
## 165 December 2015 33 Genç M Australia
## 166 January 2016 35 Genç F United States
## 167 January 2014 58 Genç F Germany
## 168 July 2014 25 Genç F United States
## 169 August 2015 34 Genç M Australia
## 170 October 2015 36 Genç F Germany
## 171 August 2013 27 Genç M United Kingdom
## 172 November 2013 33 Genç F United States
## 173 May 2014 28 Genç F United Kingdom
## 174 March 2016 49 Genç F United States
## 175 February 2014 50 Genç M United States
## 176 October 2015 19 Genç F Germany
## State Product_Category Sub_Category
## 1 Oregon Bike Racks Bike Racks
## 2 New South Wales Bottles and Cages Bottles and Cages
## 3 South Australia Bottles and Cages Bottles and Cages
## 4 England Bottles and Cages Bottles and Cages
## 5 Queensland Bottles and Cages Bottles and Cages
## 6 New York Bottles and Cages Bottles and Cages
## 7 California Bottles and Cages Bottles and Cages
## 8 Washington Bottles and Cages Bottles and Cages
## 9 Oregon Bottles and Cages Bottles and Cages
## 10 England Bottles and Cages Bottles and Cages
## 11 New South Wales Bottles and Cages Bottles and Cages
## 12 California Bottles and Cages Bottles and Cages
## 13 Saarland Bottles and Cages Bottles and Cages
## 14 California Bottles and Cages Bottles and Cages
## 15 Washington Bottles and Cages Bottles and Cages
## 16 California Bottles and Cages Bottles and Cages
## 17 California Bottles and Cages Bottles and Cages
## 18 Queensland Bottles and Cages Bottles and Cages
## 19 New South Wales Bottles and Cages Bottles and Cages
## 20 California Bottles and Cages Bottles and Cages
## 21 British Columbia Bottles and Cages Bottles and Cages
## 22 Victoria Bottles and Cages Bottles and Cages
## 23 Victoria Bottles and Cages Bottles and Cages
## 24 California Bottles and Cages Bottles and Cages
## 25 England Caps Caps
## 26 California Caps Caps
## 27 England Caps Caps
## 28 South Australia Cleaners Cleaners
## 29 Victoria Fenders Fenders
## 30 Queensland Fenders Fenders
## 31 California Fenders Fenders
## 32 New South Wales Fenders Fenders
## 33 Nordrhein-Westfalen Fenders Fenders
## 38 Seine (Paris) Helmets Helmets
## 39 England Helmets Helmets
## 40 Queensland Helmets Helmets
## 41 British Columbia Helmets Helmets
## 42 Oregon Helmets Helmets
## 43 England Helmets Helmets
## 44 British Columbia Helmets Helmets
## 45 Essonne Helmets Helmets
## 46 New South Wales Helmets Helmets
## 47 New South Wales Helmets Helmets
## 48 Hamburg Helmets Helmets
## 49 California Helmets Helmets
## 50 New South Wales Helmets Helmets
## 51 England Helmets Helmets
## 52 California Helmets Helmets
## 53 England Helmets Helmets
## 54 British Columbia Helmets Helmets
## 55 Oregon Helmets Helmets
## 56 British Columbia Helmets Helmets
## 57 California Helmets Helmets
## 58 England Helmets Helmets
## 59 New South Wales Helmets Helmets
## 60 Washington Helmets Helmets
## 61 Washington Helmets Helmets
## 62 England Helmets Helmets
## 63 Oregon Helmets Helmets
## 68 Hessen Jerseys Jerseys
## 69 England Jerseys Jerseys
## 70 New South Wales Jerseys Jerseys
## 71 South Australia Jerseys Jerseys
## 72 British Columbia Jerseys Jerseys
## 73 California Jerseys Jerseys
## 74 Queensland Jerseys Jerseys
## 75 Washington Jerseys Jerseys
## 76 New South Wales Jerseys Jerseys
## 77 Washington Jerseys Jerseys
## 78 Washington Jerseys Jerseys
## 79 England Jerseys Jerseys
## 80 South Australia Jerseys Jerseys
## 81 Queensland Mountain Bikes Mountain Bikes
## 82 California Mountain Bikes Mountain Bikes
## 83 England Mountain Bikes Mountain Bikes
## 84 Nordrhein-Westfalen Mountain Bikes Mountain Bikes
## 85 Queensland Mountain Bikes Mountain Bikes
## 86 England Mountain Bikes Mountain Bikes
## 87 California Mountain Bikes Mountain Bikes
## 88 New South Wales Mountain Bikes Mountain Bikes
## 89 California Mountain Bikes Mountain Bikes
## 90 New South Wales Mountain Bikes Mountain Bikes
## 91 California Mountain Bikes Mountain Bikes
## 92 California Mountain Bikes Mountain Bikes
## 93 Seine (Paris) Mountain Bikes Mountain Bikes
## 94 California Mountain Bikes Mountain Bikes
## 95 California Mountain Bikes Mountain Bikes
## 96 Washington Mountain Bikes Mountain Bikes
## 97 California Mountain Bikes Mountain Bikes
## 98 California Mountain Bikes Mountain Bikes
## 99 Seine Saint Denis Road Bikes Road Bikes
## 100 California Road Bikes Road Bikes
## 101 California Road Bikes Road Bikes
## 102 California Road Bikes Road Bikes
## 103 Washington Road Bikes Road Bikes
## 104 California Road Bikes Road Bikes
## 105 England Road Bikes Road Bikes
## 106 New South Wales Road Bikes Road Bikes
## 107 New South Wales Road Bikes Road Bikes
## 108 New South Wales Road Bikes Road Bikes
## 109 Washington Road Bikes Road Bikes
## 110 Washington Road Bikes Road Bikes
## 111 Queensland Road Bikes Road Bikes
## 112 Yveline Road Bikes Road Bikes
## 113 Nordrhein-Westfalen Road Bikes Road Bikes
## 114 Nordrhein-Westfalen Road Bikes Road Bikes
## 115 Garonne (Haute) Road Bikes Road Bikes
## 116 Washington Road Bikes Road Bikes
## 117 England Road Bikes Road Bikes
## 118 California Road Bikes Road Bikes
## 119 England Shorts Shorts
## 120 California Socks Socks
## 121 New South Wales Tires and Tubes Tires and Tubes
## 122 California Tires and Tubes Tires and Tubes
## 123 Washington Tires and Tubes Tires and Tubes
## 124 California Tires and Tubes Tires and Tubes
## 125 British Columbia Tires and Tubes Tires and Tubes
## 126 New South Wales Tires and Tubes Tires and Tubes
## 127 Oregon Tires and Tubes Tires and Tubes
## 128 British Columbia Tires and Tubes Tires and Tubes
## 129 British Columbia Tires and Tubes Tires and Tubes
## 130 Victoria Tires and Tubes Tires and Tubes
## 131 England Tires and Tubes Tires and Tubes
## 132 Nord Tires and Tubes Tires and Tubes
## 133 Tasmania Tires and Tubes Tires and Tubes
## 134 Oregon Tires and Tubes Tires and Tubes
## 135 Victoria Tires and Tubes Tires and Tubes
## 136 England Tires and Tubes Tires and Tubes
## 137 Victoria Tires and Tubes Tires and Tubes
## 138 California Tires and Tubes Tires and Tubes
## 139 Nordrhein-Westfalen Tires and Tubes Tires and Tubes
## 140 California Tires and Tubes Tires and Tubes
## 141 Washington Tires and Tubes Tires and Tubes
## 142 California Tires and Tubes Tires and Tubes
## 143 British Columbia Tires and Tubes Tires and Tubes
## 144 Oregon Tires and Tubes Tires and Tubes
## 145 California Tires and Tubes Tires and Tubes
## 146 British Columbia Tires and Tubes Tires and Tubes
## 147 England Tires and Tubes Tires and Tubes
## 148 Saarland Tires and Tubes Tires and Tubes
## 149 British Columbia Tires and Tubes Tires and Tubes
## 150 Queensland Tires and Tubes Tires and Tubes
## 151 Hamburg Tires and Tubes Tires and Tubes
## 152 England Tires and Tubes Tires and Tubes
## 153 British Columbia Tires and Tubes Tires and Tubes
## 154 Saarland Tires and Tubes Tires and Tubes
## 155 New South Wales Tires and Tubes Tires and Tubes
## 156 Hessen Tires and Tubes Tires and Tubes
## 157 Nord Tires and Tubes Tires and Tubes
## 158 New South Wales Tires and Tubes Tires and Tubes
## 159 Washington Tires and Tubes Tires and Tubes
## 160 British Columbia Tires and Tubes Tires and Tubes
## 161 British Columbia Tires and Tubes Tires and Tubes
## 162 Essonne Tires and Tubes Tires and Tubes
## 163 Victoria Tires and Tubes Tires and Tubes
## 164 California Tires and Tubes Tires and Tubes
## 165 Victoria Tires and Tubes Tires and Tubes
## 166 Washington Tires and Tubes Tires and Tubes
## 167 Nordrhein-Westfalen Tires and Tubes Tires and Tubes
## 168 Washington Tires and Tubes Tires and Tubes
## 169 New South Wales Tires and Tubes Tires and Tubes
## 170 Bayern Tires and Tubes Tires and Tubes
## 171 England Tires and Tubes Tires and Tubes
## 172 Oregon Tires and Tubes Tires and Tubes
## 173 England Touring Bikes Touring Bikes
## 174 Washington Touring Bikes Touring Bikes
## 175 California Touring Bikes Touring Bikes
## 176 Nordrhein-Westfalen Touring Bikes Touring Bikes
## Product Order_Quantity Unit_Cost Unit_Price Profit
## 1 Hitch Rack - 4-Bike 6 45 120 349
## 2 Water Bottle - 30 oz. 28 2 5 66
## 3 Water Bottle - 30 oz. 25 2 5 53
## 4 Road Bottle Cage 13 3 9 73
## 5 Road Bottle Cage 13 3 9 59
## 6 Water Bottle - 30 oz. 5 2 5 12
## 7 Mountain Bottle Cage 3 4 10 17
## 8 Water Bottle - 30 oz. 13 2 5 25
## 9 Water Bottle - 30 oz. 25 2 5 58
## 10 Water Bottle - 30 oz. 5 2 5 14
## 11 Water Bottle - 30 oz. 28 2 5 66
## 12 Water Bottle - 30 oz. 28 2 5 81
## 13 Road Bottle Cage 14 3 9 61
## 14 Water Bottle - 30 oz. 19 2 5 55
## 15 Mountain Bottle Cage 20 4 10 76
## 16 Water Bottle - 30 oz. 21 2 5 61
## 17 Water Bottle - 30 oz. 19 2 5 55
## 18 Water Bottle - 30 oz. 6 2 5 13
## 19 Water Bottle - 30 oz. 25 2 5 59
## 20 Road Bottle Cage 12 3 9 70
## 21 Water Bottle - 30 oz. 12 2 5 35
## 22 Water Bottle - 30 oz. 12 2 5 23
## 23 Road Bottle Cage 2 3 9 8
## 24 Water Bottle - 30 oz. 4 2 5 12
## 25 AWC Logo Cap 30 7 9 49
## 26 AWC Logo Cap 5 7 9 9
## 27 AWC Logo Cap 4 7 9 7
## 28 Bike Wash - Dissolver 10 3 8 36
## 29 Fender Set - Mountain 9 8 22 84
## 30 Fender Set - Mountain 2 8 22 21
## 31 Fender Set - Mountain 1 8 22 14
## 32 Fender Set - Mountain 18 8 22 201
## 33 Fender Set - Mountain 25 8 22 279
## 38 Sport-100 Helmet, Blue 17 13 35 267
## 39 Sport-100 Helmet, Red 28 13 35 577
## 40 Sport-100 Helmet, Red 11 13 35 180
## 41 Sport-100 Helmet, Blue 24 13 35 520
## 42 Sport-100 Helmet, Red 21 13 35 359
## 43 Sport-100 Helmet, Red 6 13 35 124
## 44 Sport-100 Helmet, Black 6 13 35 130
## 45 Sport-100 Helmet, Red 15 13 35 309
## 46 Sport-100 Helmet, Red 19 13 35 332
## 47 Sport-100 Helmet, Black 1 13 35 17
## 48 Sport-100 Helmet, Red 24 13 35 469
## 49 Sport-100 Helmet, Blue 4 13 35 85
## 50 Sport-100 Helmet, Blue 1 13 35 17
## 51 Sport-100 Helmet, Blue 8 13 35 165
## 52 Sport-100 Helmet, Red 16 13 35 341
## 53 Sport-100 Helmet, Black 18 13 35 371
## 54 Sport-100 Helmet, Black 14 13 35 303
## 55 Sport-100 Helmet, Red 8 13 35 137
## 56 Sport-100 Helmet, Black 8 13 35 173
## 57 Sport-100 Helmet, Black 19 13 35 405
## 58 Sport-100 Helmet, Blue 5 13 35 103
## 59 Sport-100 Helmet, Red 20 13 35 349
## 60 Sport-100 Helmet, Black 20 13 35 286
## 61 Sport-100 Helmet, Black 14 13 35 200
## 62 Sport-100 Helmet, Black 11 13 35 227
## 63 Sport-100 Helmet, Black 22 13 35 376
## 68 Long-Sleeve Logo Jersey, XL 28 38 50 238
## 69 Short-Sleeve Classic Jersey, XL 8 42 54 79
## 70 Short-Sleeve Classic Jersey, L 26 42 54 129
## 71 Long-Sleeve Logo Jersey, XL 4 38 50 12
## 72 Long-Sleeve Logo Jersey, M 11 38 50 127
## 73 Long-Sleeve Logo Jersey, L 10 38 50 110
## 74 Short-Sleeve Classic Jersey, M 12 42 54 40
## 75 Short-Sleeve Classic Jersey, M 20 42 54 2
## 76 Short-Sleeve Classic Jersey, M 15 42 54 75
## 77 Long-Sleeve Logo Jersey, XL 10 38 50 10
## 78 Short-Sleeve Classic Jersey, L 28 42 54 3
## 79 Long-Sleeve Logo Jersey, L 2 38 50 20
## 80 Short-Sleeve Classic Jersey, M 21 42 54 48
## 81 Mountain-100 Black, 48 1 1898 3375 937
## 82 Mountain-200 Silver, 42 1 1266 2320 1008
## 83 Mountain-200 Silver, 46 1 1266 2320 961
## 84 Mountain-200 Silver, 42 1 1266 2320 752
## 85 Mountain-200 Silver, 42 1 1266 2320 683
## 86 Mountain-200 Silver, 46 1 1266 2320 961
## 87 Mountain-400-W Silver, 38 1 420 769 334
## 88 Mountain-200 Black, 38 1 1252 2295 745
## 89 Mountain-200 Black, 38 1 1252 2295 997
## 90 Mountain-500 Silver, 42 1 308 565 184
## 91 Mountain-200 Silver, 42 1 1266 2320 1008
## 92 Mountain-200 Silver, 42 1 1266 2320 1008
## 93 Mountain-400-W Silver, 46 1 420 769 211
## 94 Mountain-200 Silver, 38 1 1266 2320 1008
## 95 Mountain-200 Silver, 38 1 1266 2320 1008
## 96 Mountain-200 Silver, 38 2 1266 2320 1087
## 97 Mountain-200 Black, 46 2 1252 2295 1994
## 98 Mountain-200 Silver, 42 1 1266 2320 1008
## 99 Road-750 Black, 52 2 344 540 198
## 100 Road-150 Red, 44 2 2171 3578 2671
## 101 Road-250 Black, 44 1 1555 2443 839
## 102 Road-550-W Yellow, 44 2 713 1120 769
## 103 Road-650 Red, 58 4 487 783 495
## 104 Road-350-W Yellow, 42 1 1083 1701 584
## 105 Road-250 Black, 52 1 1555 2443 790
## 106 Road-250 Red, 58 2 1555 2443 1141
## 107 Road-150 Red, 62 3 2171 3578 2826
## 108 Road-250 Red, 58 1 1555 2443 570
## 109 Road-750 Black, 48 2 344 540 154
## 110 Road-750 Black, 48 1 344 540 77
## 111 Road-250 Red, 48 1 1519 2443 533
## 112 Road-750 Black, 52 1 344 540 66
## 113 Road-550-W Yellow, 40 1 713 1120 261
## 114 Road-750 Black, 48 1 344 540 126
## 115 Road-350-W Yellow, 44 2 1083 1701 1032
## 116 Road-150 Red, 48 3 2171 3578 1860
## 117 Road-750 Black, 48 2 344 540 349
## 118 Road-150 Red, 48 1 2171 3578 1335
## 119 Women's Mountain Shorts, L 18 26 70 742
## 120 Racing Socks, L 15 3 9 87
## 121 Patch Kit/8 Patches 16 1 2 12
## 122 LL Road Tire 7 8 21 88
## 123 LL Road Tire 7 8 21 59
## 124 Mountain Tire Tube 14 2 5 41
## 125 HL Mountain Tire 25 13 35 541
## 126 LL Mountain Tire 10 9 25 128
## 127 Patch Kit/8 Patches 29 1 2 21
## 128 HL Mountain Tire 20 13 35 433
## 129 Mountain Tire Tube 4 2 5 12
## 130 Touring Tire Tube 15 2 5 29
## 131 Touring Tire Tube 21 2 5 59
## 132 Touring Tire Tube 13 2 5 31
## 133 Patch Kit/8 Patches 13 1 2 8
## 134 ML Mountain Tire 6 11 30 89
## 135 ML Road Tire 14 9 25 151
## 136 LL Mountain Tire 17 9 25 255
## 137 Touring Tire Tube 18 2 5 35
## 138 Mountain Tire Tube 12 2 5 35
## 139 Patch Kit/8 Patches 28 1 2 21
## 140 Patch Kit/8 Patches 14 1 2 13
## 141 Mountain Tire Tube 25 2 5 48
## 142 Patch Kit/8 Patches 3 1 2 3
## 143 ML Road Tire 4 9 25 63
## 144 Mountain Tire Tube 30 2 5 69
## 145 ML Mountain Tire 26 11 30 478
## 146 Road Tire Tube 9 1 4 27
## 147 Patch Kit/8 Patches 2 1 2 2
## 148 Patch Kit/8 Patches 6 1 2 4
## 149 Patch Kit/8 Patches 29 1 2 28
## 150 Patch Kit/8 Patches 8 1 2 5
## 151 Patch Kit/8 Patches 26 1 2 22
## 152 Road Tire Tube 28 1 4 80
## 153 Mountain Tire Tube 6 2 5 18
## 154 Patch Kit/8 Patches 30 1 2 19
## 155 Road Tire Tube 23 1 4 57
## 156 ML Mountain Tire 14 11 30 237
## 157 Patch Kit/8 Patches 19 1 2 14
## 158 Mountain Tire Tube 25 2 5 59
## 159 Patch Kit/8 Patches 16 1 2 9
## 160 ML Mountain Tire 19 11 30 355
## 161 Mountain Tire Tube 2 2 5 6
## 162 Touring Tire Tube 3 2 5 8
## 163 Patch Kit/8 Patches 4 1 2 2
## 164 Road Tire Tube 7 1 4 20
## 165 LL Road Tire 29 8 21 249
## 166 Mountain Tire Tube 10 2 5 19
## 167 Road Tire Tube 16 1 4 40
## 168 Touring Tire Tube 4 2 5 8
## 169 Mountain Tire Tube 28 2 5 66
## 170 Patch Kit/8 Patches 28 1 2 27
## 171 LL Road Tire 3 8 21 36
## 172 Road Tire Tube 7 1 4 17
## 173 Touring-2000 Blue, 54 1 755 1215 411
## 174 Touring-2000 Blue, 46 1 755 1215 193
## 175 Touring-1000 Blue, 60 1 1482 2384 854
## 176 Touring-3000 Blue, 54 3 461 742 554
## Cost Revenue Profit_kat Unit_Cost_log Revenue_log Revenue_kok Revenue_merk
## 1 270 619 Az 1.6532125 2.7916906 24.879711 -171.301136
## 2 56 122 Az 0.3010300 2.0863598 11.045361 -668.301136
## 3 50 103 Az 0.3010300 2.0128372 10.148892 -687.301136
## 4 39 112 Az 0.4771213 2.0492180 10.583005 -678.301136
## 5 39 98 Az 0.4771213 1.9912261 9.899495 -692.301136
## 6 10 22 Az 0.3010300 1.3424227 4.690416 -768.301136
## 7 12 29 Az 0.6020600 1.4623980 5.385165 -761.301136
## 8 26 51 Az 0.3010300 1.7075702 7.141428 -739.301136
## 9 50 108 Az 0.3010300 2.0334238 10.392305 -682.301136
## 10 10 24 Az 0.3010300 1.3802112 4.898979 -766.301136
## 11 56 122 Az 0.3010300 2.0863598 11.045361 -668.301136
## 12 56 137 Az 0.3010300 2.1367206 11.704700 -653.301136
## 13 42 103 Az 0.4771213 2.0128372 10.148892 -687.301136
## 14 38 93 Az 0.3010300 1.9684829 9.643651 -697.301136
## 15 80 156 Az 0.6020600 2.1931246 12.489996 -634.301136
## 16 42 103 Az 0.3010300 2.0128372 10.148892 -687.301136
## 17 38 93 Az 0.3010300 1.9684829 9.643651 -697.301136
## 18 12 25 Az 0.3010300 1.3979400 5.000000 -765.301136
## 19 50 109 Az 0.3010300 2.0374265 10.440307 -681.301136
## 20 36 106 Az 0.4771213 2.0253059 10.295630 -684.301136
## 21 24 59 Az 0.3010300 1.7708520 7.681146 -731.301136
## 22 24 47 Az 0.3010300 1.6720979 6.855655 -743.301136
## 23 6 14 Az 0.4771213 1.1461280 3.741657 -776.301136
## 24 8 20 Az 0.3010300 1.3010300 4.472136 -770.301136
## 25 210 259 Az 0.8450980 2.4132998 16.093477 -531.301136
## 26 35 44 Az 0.8450980 1.6434527 6.633250 -746.301136
## 27 28 35 Az 0.8450980 1.5440680 5.916080 -755.301136
## 28 30 66 Az 0.4771213 1.8195439 8.124038 -724.301136
## 29 72 156 Az 0.9030900 2.1931246 12.489996 -634.301136
## 30 16 37 Az 0.9030900 1.5682017 6.082763 -753.301136
## 31 8 22 Az 0.9030900 1.3424227 4.690416 -768.301136
## 32 144 345 Az 0.9030900 2.5378191 18.574176 -445.301136
## 33 200 479 Az 0.9030900 2.6803355 21.886069 -311.301136
## 38 221 488 Az 1.1139434 2.6884198 22.090722 -302.301136
## 39 364 941 Az 1.1139434 2.9735896 30.675723 150.698864
## 40 143 323 Az 1.1139434 2.5092025 17.972201 -467.301136
## 41 312 832 Az 1.1139434 2.9201233 28.844410 41.698864
## 42 273 632 Az 1.1139434 2.8007171 25.139610 -158.301136
## 43 78 202 Az 1.1139434 2.3053514 14.212670 -588.301136
## 44 78 208 Az 1.1139434 2.3180633 14.422205 -582.301136
## 45 195 504 Az 1.1139434 2.7024305 22.449944 -286.301136
## 46 247 579 Az 1.1139434 2.7626786 24.062419 -211.301136
## 47 13 30 Az 1.1139434 1.4771213 5.477226 -760.301136
## 48 312 781 Az 1.1139434 2.8926510 27.946377 -9.301136
## 49 52 137 Az 1.1139434 2.1367206 11.704700 -653.301136
## 50 13 30 Az 1.1139434 1.4771213 5.477226 -760.301136
## 51 104 269 Az 1.1139434 2.4297523 16.401219 -521.301136
## 52 208 549 Az 1.1139434 2.7395723 23.430749 -241.301136
## 53 234 605 Az 1.1139434 2.7817554 24.596748 -185.301136
## 54 182 485 Az 1.1139434 2.6857417 22.022716 -305.301136
## 55 104 241 Az 1.1139434 2.3820170 15.524175 -549.301136
## 56 104 277 Az 1.1139434 2.4424798 16.643317 -513.301136
## 57 247 652 Az 1.1139434 2.8142476 25.534291 -138.301136
## 58 65 168 Az 1.1139434 2.2253093 12.961481 -622.301136
## 59 260 609 Az 1.1139434 2.7846173 24.677925 -181.301136
## 60 260 546 Az 1.1139434 2.7371926 23.366643 -244.301136
## 61 182 382 Az 1.1139434 2.5820634 19.544820 -408.301136
## 62 143 370 Az 1.1139434 2.5682017 19.235384 -420.301136
## 63 286 662 Az 1.1139434 2.8208580 25.729361 -128.301136
## 68 1064 1302 Az 1.5797836 3.1146110 36.083237 511.698864
## 69 336 415 Az 1.6232493 2.6180481 20.371549 -375.301136
## 70 1092 1221 Az 1.6232493 3.0867157 34.942810 430.698864
## 71 152 164 Az 1.5797836 2.2148438 12.806248 -626.301136
## 72 418 545 Az 1.5797836 2.7363965 23.345235 -245.301136
## 73 380 490 Az 1.5797836 2.6901961 22.135944 -300.301136
## 74 504 544 Az 1.6232493 2.7355989 23.323808 -246.301136
## 75 840 842 Az 1.6232493 2.9253121 29.017236 51.698864
## 76 630 705 Az 1.6232493 2.8481891 26.551836 -85.301136
## 77 380 390 Az 1.5797836 2.5910646 19.748418 -400.301136
## 78 1176 1179 Az 1.6232493 3.0715138 34.336569 388.698864
## 79 76 96 Az 1.5797836 1.9822712 9.797959 -694.301136
## 80 882 930 Az 1.6232493 2.9684829 30.495901 139.698864
## 81 1898 2835 Az 3.2782962 3.4525531 53.244718 2044.698864
## 82 1266 2274 Az 3.1024337 3.3567905 47.686476 1483.698864
## 83 1266 2227 Az 3.1024337 3.3477202 47.191101 1436.698864
## 84 1266 2018 Az 3.1024337 3.3049212 44.922155 1227.698864
## 85 1266 1949 Az 3.1024337 3.2898118 44.147480 1158.698864
## 86 1266 2227 Az 3.1024337 3.3477202 47.191101 1436.698864
## 87 420 754 Az 2.6232493 2.8773713 27.459060 -36.301136
## 88 1252 1997 Az 3.0976043 3.3003781 44.687806 1206.698864
## 89 1252 2249 Az 3.0976043 3.3519895 47.423623 1458.698864
## 90 308 492 Az 2.4885507 2.6919651 22.181073 -298.301136
## 91 1266 2274 Az 3.1024337 3.3567905 47.686476 1483.698864
## 92 1266 2274 Az 3.1024337 3.3567905 47.686476 1483.698864
## 93 420 631 Az 2.6232493 2.8000294 25.119713 -159.301136
## 94 1266 2274 Az 3.1024337 3.3567905 47.686476 1483.698864
## 95 1266 2274 Az 3.1024337 3.3567905 47.686476 1483.698864
## 96 2532 3619 Az 3.1024337 3.5585886 60.158125 2828.698864
## 97 2504 4498 Orta 3.0976043 3.6530195 67.067131 3707.698864
## 98 1266 2274 Az 3.1024337 3.3567905 47.686476 1483.698864
## 99 688 886 Az 2.5365584 2.9474337 29.765752 95.698864
## 100 4342 7013 Orta 3.3366598 3.8459038 83.743656 6222.698864
## 101 1555 2394 Az 3.1917304 3.3791241 48.928519 1603.698864
## 102 1426 2195 Az 2.8530895 3.3414345 46.850827 1404.698864
## 103 1948 2443 Az 2.6875290 3.3879235 49.426713 1652.698864
## 104 1083 1667 Az 3.0346285 3.2219356 40.828911 876.698864
## 105 1555 2345 Az 3.1917304 3.3701428 48.425200 1554.698864
## 106 3110 4251 Az 3.1917304 3.6284911 65.199693 3460.698864
## 107 6513 9339 Cok 3.3366598 3.9703004 96.638502 8548.698864
## 108 1555 2125 Az 3.1917304 3.3273589 46.097722 1334.698864
## 109 688 842 Az 2.5365584 2.9253121 29.017236 51.698864
## 110 344 421 Az 2.5365584 2.6242821 20.518285 -369.301136
## 111 1519 2052 Az 3.1815578 3.3121774 45.299007 1261.698864
## 112 344 410 Az 2.5365584 2.6127839 20.248457 -380.301136
## 113 713 974 Az 2.8530895 2.9885590 31.208973 183.698864
## 114 344 470 Az 2.5365584 2.6720979 21.679483 -320.301136
## 115 2166 3198 Az 3.0346285 3.5048785 56.550862 2407.698864
## 116 6513 8373 Orta 3.3366598 3.9228811 91.504098 7582.698864
## 117 688 1037 Az 2.5365584 3.0157788 32.202484 246.698864
## 118 2171 3506 Az 3.3366598 3.5448119 59.211485 2715.698864
## 119 468 1210 Az 1.4149733 3.0827854 34.785054 419.698864
## 120 45 132 Az 0.4771213 2.1205739 11.489125 -658.301136
## 121 16 28 Az 0.0000000 1.4471580 5.291503 -762.301136
## 122 56 144 Az 0.9030900 2.1583625 12.000000 -646.301136
## 123 56 115 Az 0.9030900 2.0606978 10.723805 -675.301136
## 124 28 69 Az 0.3010300 1.8388491 8.306624 -721.301136
## 125 325 866 Az 1.1139434 2.9375179 29.427878 75.698864
## 126 90 218 Az 0.9542425 2.3384565 14.764823 -572.301136
## 127 29 50 Az 0.0000000 1.6989700 7.071068 -740.301136
## 128 260 693 Az 1.1139434 2.8407332 26.324893 -97.301136
## 129 8 20 Az 0.3010300 1.3010300 4.472136 -770.301136
## 130 30 59 Az 0.3010300 1.7708520 7.681146 -731.301136
## 131 42 101 Az 0.3010300 2.0043214 10.049876 -689.301136
## 132 26 57 Az 0.3010300 1.7558749 7.549834 -733.301136
## 133 13 21 Az 0.0000000 1.3222193 4.582576 -769.301136
## 134 66 155 Az 1.0413927 2.1903317 12.449900 -635.301136
## 135 126 277 Az 0.9542425 2.4424798 16.643317 -513.301136
## 136 153 408 Az 0.9542425 2.6106602 20.199010 -382.301136
## 137 36 71 Az 0.3010300 1.8512583 8.426150 -719.301136
## 138 24 59 Az 0.3010300 1.7708520 7.681146 -731.301136
## 139 28 49 Az 0.0000000 1.6901961 7.000000 -741.301136
## 140 14 27 Az 0.0000000 1.4313638 5.196152 -763.301136
## 141 50 98 Az 0.3010300 1.9912261 9.899495 -692.301136
## 142 3 6 Az 0.0000000 0.7781513 2.449490 -784.301136
## 143 36 99 Az 0.9542425 1.9956352 9.949874 -691.301136
## 144 60 129 Az 0.3010300 2.1105897 11.357817 -661.301136
## 145 286 764 Az 1.0413927 2.8830934 27.640550 -26.301136
## 146 9 36 Az 0.0000000 1.5563025 6.000000 -754.301136
## 147 2 4 Az 0.0000000 0.6020600 2.000000 -786.301136
## 148 6 10 Az 0.0000000 1.0000000 3.162278 -780.301136
## 149 29 57 Az 0.0000000 1.7558749 7.549834 -733.301136
## 150 8 13 Az 0.0000000 1.1139434 3.605551 -777.301136
## 151 26 48 Az 0.0000000 1.6812412 6.928203 -742.301136
## 152 28 108 Az 0.0000000 2.0334238 10.392305 -682.301136
## 153 12 30 Az 0.3010300 1.4771213 5.477226 -760.301136
## 154 30 49 Az 0.0000000 1.6901961 7.000000 -741.301136
## 155 23 80 Az 0.0000000 1.9030900 8.944272 -710.301136
## 156 154 391 Az 1.0413927 2.5921768 19.773720 -399.301136
## 157 19 33 Az 0.0000000 1.5185139 5.744563 -757.301136
## 158 50 109 Az 0.3010300 2.0374265 10.440307 -681.301136
## 159 16 25 Az 0.0000000 1.3979400 5.000000 -765.301136
## 160 209 564 Az 1.0413927 2.7512791 23.748684 -226.301136
## 161 4 10 Az 0.3010300 1.0000000 3.162278 -780.301136
## 162 6 14 Az 0.3010300 1.1461280 3.741657 -776.301136
## 163 4 6 Az 0.0000000 0.7781513 2.449490 -784.301136
## 164 7 27 Az 0.0000000 1.4313638 5.196152 -763.301136
## 165 232 481 Az 0.9030900 2.6821451 21.931712 -309.301136
## 166 20 39 Az 0.3010300 1.5910646 6.244998 -751.301136
## 167 16 56 Az 0.0000000 1.7481880 7.483315 -734.301136
## 168 8 16 Az 0.3010300 1.2041200 4.000000 -774.301136
## 169 56 122 Az 0.3010300 2.0863598 11.045361 -668.301136
## 170 28 55 Az 0.0000000 1.7403627 7.416198 -735.301136
## 171 24 60 Az 0.9030900 1.7781513 7.745967 -730.301136
## 172 7 24 Az 0.0000000 1.3802112 4.898979 -766.301136
## 173 755 1166 Az 2.8779470 3.0666986 34.146742 375.698864
## 174 755 948 Az 2.8779470 2.9768083 30.789609 157.698864
## 175 1482 2336 Az 3.1708482 3.3684728 48.332184 1545.698864
## 176 1383 1937 Az 2.6637009 3.2871296 44.011362 1146.698864
## Revenue_kok_merk
## 1 3.07396056
## 2 -10.76038903
## 3 -11.65685849
## 4 -11.22274481
## 5 -11.90625511
## 6 -17.11533429
## 7 -16.42058524
## 8 -14.66432162
## 9 -11.41344521
## 10 -16.90677057
## 11 -10.76038903
## 12 -10.10105014
## 13 -11.65685849
## 14 -12.16209929
## 15 -9.31575405
## 16 -11.65685849
## 17 -12.16209929
## 18 -16.80575005
## 19 -11.36544354
## 20 -11.51011991
## 21 -14.12460430
## 22 -14.95009545
## 23 -18.06409266
## 24 -17.33361410
## 25 -5.71227311
## 26 -15.17250047
## 27 -15.88967027
## 28 -13.68171165
## 29 -9.31575405
## 30 -15.72298752
## 31 -17.11533429
## 32 -3.23157443
## 33 0.08031858
## 38 0.28497198
## 39 8.86997325
## 40 -3.83354930
## 41 7.03866015
## 42 3.33386013
## 43 -7.59307965
## 44 -7.38354495
## 45 0.64419427
## 46 2.25666878
## 47 -16.32852448
## 48 6.14062717
## 49 -10.10105014
## 50 -16.32852448
## 51 -5.40453058
## 52 1.62499898
## 53 2.79099770
## 54 0.21696549
## 55 -6.28157535
## 56 -5.16243307
## 57 3.72854062
## 58 -8.84426865
## 59 2.87217531
## 60 1.56089284
## 61 -2.26092977
## 62 -2.57036599
## 63 3.92361061
## 68 14.27748705
## 69 -1.43420126
## 70 13.13706037
## 71 -8.99950158
## 72 1.53948501
## 73 0.33019357
## 74 1.51805753
## 75 7.21148621
## 76 4.74608604
## 77 -2.05733239
## 78 12.53081938
## 79 -12.00779108
## 80 8.69015131
## 81 31.43896800
## 82 25.88072602
## 83 25.38535080
## 84 23.11640484
## 85 22.34173006
## 86 25.38535080
## 87 5.65331038
## 88 22.88205589
## 89 25.61787275
## 90 0.37532296
## 91 25.88072602
## 92 25.88072602
## 93 3.31396332
## 94 25.88072602
## 95 25.88072602
## 96 38.35237492
## 97 45.26138050
## 98 25.88072602
## 99 7.96000208
## 100 61.93790642
## 101 27.12276924
## 102 25.04507705
## 103 27.62096337
## 104 19.02316127
## 105 26.61945000
## 106 43.39394320
## 107 74.83275159
## 108 24.29197224
## 109 7.21148621
## 110 -1.28746552
## 111 23.49325656
## 112 -1.55729332
## 113 9.40322302
## 114 -0.12626666
## 115 34.74511201
## 116 69.69834822
## 117 10.39673433
## 118 37.40573532
## 119 12.97930421
## 120 -10.31662476
## 121 -16.51424743
## 122 -9.80575005
## 123 -11.08194476
## 124 -13.49912619
## 125 7.62212789
## 126 -7.04092699
## 127 -14.73468224
## 128 4.51914311
## 129 -17.33361410
## 130 -14.12460430
## 131 -11.75587443
## 132 -14.25591562
## 133 -17.22317436
## 134 -9.35585045
## 135 -5.16243307
## 136 -1.60674017
## 137 -13.37960028
## 138 -14.12460430
## 139 -14.80575005
## 140 -16.60959763
## 141 -11.90625511
## 142 -19.35626031
## 143 -11.85587568
## 144 -10.44793336
## 145 5.83479987
## 146 -15.80575005
## 147 -19.80575005
## 148 -18.64347239
## 149 -14.25591562
## 150 -18.20019878
## 151 -14.87754682
## 152 -11.41344521
## 153 -16.32852448
## 154 -14.80575005
## 155 -12.86147814
## 156 -2.03203012
## 157 -16.06118740
## 158 -11.36544354
## 159 -16.80575005
## 160 1.94293412
## 161 -18.64347239
## 162 -18.06409266
## 163 -19.35626031
## 164 -16.60959763
## 165 0.12596215
## 166 -15.56075205
## 167 -14.32243528
## 168 -17.80575005
## 169 -10.76038903
## 170 -14.38955156
## 171 -14.05978336
## 172 -16.90677057
## 173 12.34099211
## 174 8.98385859
## 175 26.52643384
## 176 22.20561212
fit2_res_test<-as.data.frame(t(defaultSummary(data.frame(obs=test$Unit_Cost_log,pred=predict(fit2,test)))))
rownames(fit2_res_test)<-"fit2"
fit3_res_test<-as.data.frame(t(defaultSummary(data.frame(obs=test$Unit_Cost_log,pred=predict(fit3,test)))))
rownames(fit3_res_test)<-"fit3"
fit4_res_test<-as.data.frame(t(defaultSummary(data.frame(obs=test$Unit_Cost_log,pred=predict(fit4,test)))))
rownames(fit4_res_test)<-"fit4"
fit5_res_test<-as.data.frame(t(defaultSummary(data.frame(obs=test$Unit_Cost_log,pred=predict(fit5,test)))))
rownames(fit5_res_test)<-"fit5"
round(rbind(fit2_res_test,fit3_res_test,fit4_res_test,fit5_res_test),2)
## RMSE Rsquared MAE
## fit2 0.47 0.81 0.38
## fit3 0.44 0.84 0.35
## fit4 0.75 0.51 0.57
## fit5 0.75 0.51 0.57
##R-squared fit 3 değeri traindekine yakın olduğu için rahatlıkla bu modelde kalabiliyoruz##
list2<-list(fit2,fit3,fit4,fit5)
PRESS <- function(linmodel) { pr <- residuals(linmodel)/(1 - lm.influence(linmodel)$hat)
sum(pr^2)
}
for (i in list2) {
print(paste("Press:",round(PRESS(i),3)))
}
## [1] "Press: 179.659"
## [1] "Press: 155.027"
## [1] "Press: 368.767"
## [1] "Press: 367.739"
library(ggfortify)
autoplot(fit2)
##R consoleda gördüğümüz üzere de fit3 en küçük olduğu için en iyi model olduğunu doğruluyoruz##
##CART - Regression##
library(rpart)
library(rpart.plot)
cart<-rpart(Unit_Cost_log~Revenue_kok+Cost+Customer_Gender , data=train)
cart$variable.importance
## Cost Revenue_kok
## 705.7746 575.2014
rpart.plot(cart)
##Bu fonksiyona homo-hetero durumlarını inceleterek 5.adımda bulduğumuz gini ve entropi ile bağımlı değişkeni en iyi ifade eden değişkeni seçtirip bölümleme yaptırdık. ##Bağımlı değişkenim unit_cost_log (total maaliyet) numeric olduğu için regresyon modellemesi yaptım. (kategorik olsaydı sınıflandırma yapardım)##
#train icin:
defaultSummary(data.frame(obs=train$Unit_Cost_log,pred=predict(cart,train)))
## RMSE Rsquared MAE
## 0.3436556 0.8957744 0.2466144
#test icin:
defaultSummary(data.frame(obs=test$Unit_Cost_log,pred=predict(cart,test)))
## RMSE Rsquared MAE
## 0.3200610 0.9106522 0.2300350